
rm(list=ls()); gc()
objects()

#Set working directory (CHANGE WITH DIRECTORY WHERE THE FILES ARE LOCATED)
setwd("J:/Projects/Recall project/Replication folder")

#install.packages("groundhog")
library("groundhog") #Allows reproducibility by allowing loading of package versions as of a specific date

package.date<-"2021-10-01"
groundhog.library("lubridate", package.date) #Easy date modification
'OK' #Allows Groundhog to save chosen version of packages (without deleting existing ones)
groundhog.library("stringr", package.date)  #Easy string reading function
groundhog.library("plyr", package.date) #Rbind dataframes with some columns missing
groundhog.library("ggplot2", package.date) #Graphs
groundhog.library("lfe", package.date) #Two-way fixed effects
groundhog.library("gridExtra", package.date) #Arrange graphs together
groundhog.library("ggrepel", package.date) #Non-overlapping labels in plot
groundhog.library("Rcpp", package.date) #Forggrepel
groundhog.library("xtable", package.date) #Latex table
groundhog.library("data.table", package.date) #Latex table
groundhog.library("papeR", package.date) #Summary stats
groundhog.library("gtools", package.date)


#### FIGURE 1: NUMBER AND SHARE OF VEHICLES RESOLD AND RECALLED PER MONTH ####


rm(list=ls()); gc()
objects()




load("Registration_recalls_merged.RData")
load("Link_Recalls_Date.RData")
#Remove cars exported before starting of our period
i<-(is.na(Main$exported_last) & Main$exportindicator=="Nee") | str_detect(Main$exported_last, "^2019") | str_detect(Main$exported_last, "^2018") | str_detect(Main$exported_last, "^2017-11") | str_detect(Main$exported_last, "^2017-12")
table(i)/nrow(Main)
Main<-subset(Main, (is.na(Main$exported_last) & Main$exportindicator=="Nee") | str_detect(exported_last, "^2019") | str_detect(exported_last, "^2018") | str_detect(exported_last, "^2017-11") | str_detect(exported_last, "^2017-12") )
#Convert to date format
Main$datumeersteafgiftenederland<-as.Date(Main$datumeersteafgiftenederland ,origin="1970-01-01", format="%d/%m/%Y")

#START CREATING MONTHLY STATISTICS
#To avoid creating a huge panel, calculate share of recalls for each month separately
date<-as.Date("2017-11-01",origin="1970-01-01", format="%Y-%m-%d")
#Check if first registration in NL occurred before date
i1<-date>=Main$datumeersteafgiftenederland 
#Check if car exported before date
i2<-is.na(Main$exported_last)==FALSE & date>=Main$exported_last
#Check if car scrapped/stolen before date
i3<-is.na(Main$scrappage_date)==FALSE & date>=Main$scrappage_date
#Keep if in NL in that month, not exported/scrapped/stolen
#This is the number of cars circulating in that month
Temp<-subset(Main, i1==TRUE & i2==FALSE & i3==FALSE)


Temp2<-unique(subset(Recalls_Date, month(publicatiedatumrdw)==month(date) & year(publicatiedatumrdw)==year(date), select=c(kenteken)))
Temp2$recalled<-1

Temp3<-merge(Temp, Temp2, by="kenteken", all.x=TRUE, all.y=FALSE)

Temp3$recalled[is.na(Temp3$recalled)==TRUE]<-0

average<-mean(Temp3$recalled)
tot<-sum(Temp3$recalled)
Stats<-cbind(date, average, tot)
rm(average, tot, i1, i2, i3, Temp, Temp2, Temp3, i, date)


datelist<-c("2017-12-01", "2018-01-01","2018-02-01","2018-03-01","2018-04-01","2018-05-01","2018-06-01","2018-07-01","2018-08-01",
            "2018-09-01","2018-10-01","2018-11-01","2018-12-01","2019-01-01","2019-02-01","2019-03-01","2019-04-01")

for(d in 1:(length(datelist))) {
  #To avoid creating a huge panel, calculate share of recalls for each month separately
  date<-as.Date(datelist[d],origin="1970-01-01", format="%Y-%m-%d")
  print(paste("Month", date))
  #Check if first registration in NL occurred before date
  i1<-date>=Main$datumeersteafgiftenederland 
  #Check if car exported before date
  i2<-is.na(Main$exported_last)==FALSE & date>=Main$exported_last
  #Check if car scrapped/stolen before date
  i3<-is.na(Main$scrappage_date)==FALSE & date>=Main$scrappage_date
  #Keep if in NL in that month, not exported/scrapped/stolen
  #This is the number of cars circulating in that month
  Temp<-subset(Main, i1==TRUE & i2==FALSE & i3==FALSE)
  
  
  Temp2<-unique(subset(Recalls_Date, month(publicatiedatumrdw)==month(date) & year(publicatiedatumrdw)==year(date), select=c(kenteken)))
  Temp2$recalled<-1
  
  Temp3<-merge(Temp, Temp2, by="kenteken", all.x=TRUE, all.y=FALSE)
  
  Temp3$recalled[is.na(Temp3$recalled)==TRUE]<-0
  
  average<-mean(Temp3$recalled)
  tot<-sum(Temp3$recalled)
  
  Toadd<-cbind(date, average, tot)
  Stats<-rbind(Stats, Toadd)
  rm(average, tot, i1, i2, i3, Temp, Temp2, Temp3, Toadd, date)
}
Stats<-as.data.frame(Stats)
Stats$date<-as.Date(Stats$date,origin="1970-01-01")

Stats$average<-round(Stats$average, digits=4)
Stats<-subset(Stats, date!="2019-04-01")

#Average recall rates
summary(Stats$average)
summary(Stats$tot)

Recall_time_stats<-Stats

save(Recall_time_stats, file="Recall_time_stats.RData")

Recall_time<-ggplot(data=Stats, aes(x=date, y=tot))+
  geom_line(size=2)+
  geom_point(size=4)+
  geom_label_repel(aes(x = date, y = tot, label = paste(average*100, "%", sep="")),point.padding=unit(0, "lines"))+
  labs(x="Month", y="Vehicles recalled")+
  scale_x_date(date_labels = "%b %Y")+
  theme(text=element_text(size=16),plot.title = element_text(hjust = 0.5))
Recall_time
save(Recall_time, file="Recall_time.RData")
ggsave("Recall_time.pdf", plot=Recall_time, width=8, height=5)




##SALES PER MONTH NEW
rm(list=ls()); gc()
objects()
options(scipen=10000)


load("Registration_recalls_merged.RData")

Main$datumeersteafgiftenederland<-as.Date(Main$datumeersteafgiftenederland, format="%d/%m/%Y")
Main$scrappage_date<-as.Date(Main$scrappage_date, format="%Y-%m-%d")
Main$exported_last<-as.Date(Main$exported_last, format="%Y-%m-%d")


#Extract first resale date in that month/year
Main$dateres<-as.Date(str_extract(Main$datumtenaamstelling_all,"[:digit:]{2}/11/2017"), format="%d/%m/%Y")


#1: there is resale in month
#2: Car arrived in NL in that month or earlier
#3: Resale happened after date car arrived in NL  i.e. date of arrival not counted as resale
#4: Scrappage happened later
#5: Export happened later
i<-str_detect(Main$datumtenaamstelling_all, "11/2017") & 
  Main$datumeersteafgiftenederland<as.Date("01/12/2017", format="%d/%m/%Y") &
  Main$datumeersteafgiftenederland<Main$dateres &
  (is.na(Main$scrappage_date)==TRUE | Main$scrappage_date>=as.Date("01/12/2017", format="%d/%m/%Y") ) &
  (is.na(Main$exported_last)==TRUE | Main$exported_last>=as.Date("01/12/2017", format="%d/%m/%Y") )

n1<-as.numeric(table(i)["TRUE"])

#1: Car arrived in NL in that month or earlier
#2: Scrappage happened later
#3: Export happened later
i<-Main$datumeersteafgiftenederland<as.Date("01/12/2017", format="%d/%m/%Y") &
  (is.na(Main$scrappage_date)==TRUE | Main$scrappage_date>=as.Date("01/12/2017", format="%d/%m/%Y") ) &
  (is.na(Main$exported_last)==TRUE | Main$exported_last>=as.Date("01/12/2017", format="%d/%m/%Y") )

n2<-as.numeric(table(i)["TRUE"])

res<-n1
fleet<-n2
Main$dateres<-NULL
rm(n1, n2)


##
d1<-c("[:digit:]{2}/12/2017", "[:digit:]{2}/01/2018", "[:digit:]{2}/02/2018", "[:digit:]{2}/03/2018",
      "[:digit:]{2}/04/2018","[:digit:]{2}/05/2018","[:digit:]{2}/06/2018","[:digit:]{2}/07/2018",    
      "[:digit:]{2}/08/2018", "[:digit:]{2}/09/2018","[:digit:]{2}/10/2018","[:digit:]{2}/11/2018", 
      "[:digit:]{2}/12/2018", "[:digit:]{2}/01/2019", "[:digit:]{2}/02/2019", "[:digit:]{2}/03/2019")
d2<-c("12/2017", "01/2018", "02/2018", "03/2018", "04/2018","05/2018","06/2018","07/2018", "08/2018", 
      "09/2018","10/2018","11/2018", "12/2018", "01/2019", "02/2019", "03/2019")
d3<-c("01/01/2018", "01/02/2018", "01/03/2018", "01/04/2018","01/05/2018","01/06/2018","01/07/2018", "01/08/2018", 
      "01/09/2018","01/10/2018","01/11/2018", "01/12/2018", "01/01/2019", "01/02/2019", "01/03/2019", "01/04/2019")

for(j in 1:16) {
  Main$dateres<-as.Date(str_extract(Main$datumtenaamstelling_all,d1[j]), format="%d/%m/%Y")
  i<-str_detect(Main$datumtenaamstelling_all, d2[j]) & 
    Main$datumeersteafgiftenederland<as.Date(d3[j], format="%d/%m/%Y") &
    Main$datumeersteafgiftenederland<Main$dateres &
    (is.na(Main$scrappage_date)==TRUE | Main$scrappage_date>=as.Date(d3[j], format="%d/%m/%Y") ) &
    (is.na(Main$exported_last)==TRUE | Main$exported_last>=as.Date(d3[j], format="%d/%m/%Y") )
  n1<-as.numeric(table(i)["TRUE"])
  i<-Main$datumeersteafgiftenederland<as.Date(d3[j], format="%d/%m/%Y") &
    (is.na(Main$scrappage_date)==TRUE | Main$scrappage_date>=as.Date(d3[j], format="%d/%m/%Y") ) &
    (is.na(Main$exported_last)==TRUE | Main$exported_last>=as.Date(d3[j], format="%d/%m/%Y") )
  n2<-as.numeric(table(i)["TRUE"])
  res<-c(res, n1)
  fleet<-c(fleet, n2)
  Main$dateres<-NULL
  rm(n1, n2)
}

Temp<-as.data.frame(cbind(res, fleet))
Temp$share<-Temp$res/Temp$fleet
date<-c("01/11/2017","01/12/2017", "01/01/2018", "01/02/2018", "01/03/2018", "01/04/2018","01/05/2018","01/06/2018","01/07/2018", "01/08/2018", 
        "01/09/2018","01/10/2018","01/11/2018", "01/12/2018", "01/01/2019", "01/02/2019", "01/03/2019")

Temp$date<-date
Temp$date<-as.Date(Temp$date, format="%d/%m/%Y")

sum(Temp$res)/sum(Temp$fleet)

Temp$share<-round(Temp$share, digits=4)

Resale_time_stats<-Temp

save(Resale_time_stats, file="Resale_time_stats.RData")

#Get graphs together
load("Recall_time_stats.RData")

#Drop April 2019 to allow comparison with other graph

Sys.setlocale("LC_TIME", "English")
Recall_time<-ggplot(data=Recall_time_stats, aes(x=date, y=tot))+
  geom_line(size=2)+
  geom_point(size=4)+
  geom_label_repel(aes(x = date, y = tot, label = paste(average*100, "%", sep="")),point.padding=unit(0.5, "lines"))+
  labs(x="Month", y="Vehicles recalled")+
  scale_x_date(date_labels = "%b %Y", limits=c(as.Date("2017-11-01"), as.Date("2019-03-01")), breaks=c(as.Date("2018-01-01"),as.Date("2018-04-01"),as.Date("2018-07-01"),as.Date("2018-10-01"),as.Date("2019-01-01")))+
  theme(text=element_text(size=16),plot.title = element_text(hjust = 0.5))
Recall_time


Resale_time<-ggplot(data=Resale_time_stats, aes(x=date, y=res))+
  geom_line(size=2)+
  geom_point(size=4)+
  geom_label_repel(aes(x = date, y = res, label = paste(share*100, "%", sep="")),point.padding=unit(0.5, "lines"))+
  labs(x="Month", y="Vehicles resold")+
  scale_x_date(date_labels = "%b %Y", limits=c(as.Date("2017-11-01"), as.Date("2019-03-01")), breaks=c(as.Date("2018-01-01"),as.Date("2018-04-01"),as.Date("2018-07-01"),as.Date("2018-10-01"),as.Date("2019-01-01")))+
  theme(text=element_text(size=16),plot.title = element_text(hjust = 0.5))
Resale_time


Rec_Sale_Time<-grid.arrange(Recall_time, Resale_time, ncol =1, nrow = 2)
Rec_Sale_Time


ggsave("Rec_Sale_Time.pdf", plot=Rec_Sale_Time, width=8, height=10)



####FIGURE 2: EFFECT OF VEHICLE RECALLS ON RESALES: RECALLED AND NON-RECALLED VEHICLES####



rm(list=ls()); gc()
objects()


#This panel is a reduced version of the full panel, including only versions with at least one recall
#and no recalls before Nov 2017
load("Panel.RData")

nrow(unique(subset(Panel, select=kenteken)))


#FOUR GROUPS
#RECALLED BEFORE RECALL
#RECALLED AFTER RECALL
#NOT RECALLED BEFORE PLACEBO RECALL
#RECALLED AFTER PLACEBO RECALL

#Get the modal date of recall for each version
#This is necessary in case the same recall is not notified at the same time for all recalls
#In this case most frequent date = placebo recall date for non-recalled vehicles in that version
Panel$num<-1
Temp<-aggregate(num ~ recall_new + id_ver, data=Panel, FUN=sum, subset=is.na(recall_new)==FALSE )
Temp1<-as.data.frame(unlist(by(Temp$num, Temp$id_ver, FUN=max, simplify=FALSE), use.names=TRUE))
Temp1$id_ver<-row.names(Temp1)
names(Temp1)<-c("maxval" ,"id_ver")
Temp2<-merge(Temp, Temp1, by="id_ver", all.x=TRUE, all.y=FALSE)
Temp2$ok<-Temp2$num==Temp2$maxval
Temp2$placeborec<-Temp2$recall_new
Temp2<-subset(Temp2, ok==TRUE, select=c(id_ver, placeborec))

#Add placebo recall date
Panel<-merge(Panel, Temp2, by="id_ver", all.x=TRUE, all.y=FALSE)

#Generate distance from recall
Panel$Placebo_rec_new_dist<-((year(Panel$date)*12)+(month(Panel$date)))-((year(Panel$placeborec)*12)+month(Panel$placeborec))
#Of course, if car had an actual recall, the distance is taken from the distance that was already calculated earlier
Panel$Placebo_rec_new_dist[is.na(Panel$recall_new)==FALSE]<-Panel$rec_new_dist_both[is.na(Panel$recall_new)==FALSE]

#Define post-treatment period and treatment group
Panel$treat<-0
Panel$treat[Panel$Placebo_rec_new_dist>0]<-1
Panel$rec<-0
Panel$rec[is.na(Panel$recall_new)==FALSE]<-1

#Get unweighted average share by version i.e. average of average version share of monthly resales
TempShare<-aggregate(resale ~ Placebo_rec_new_dist + rec + id_ver , data=Panel, FUN="mean"  )
TempShare1<-aggregate(resale ~ Placebo_rec_new_dist + rec, data=TempShare, FUN="mean"  )

TempShare1$recname<-""
TempShare1$recname[TempShare1$rec==0]<-"Non-Recalled"
TempShare1$recname[TempShare1$rec==1]<-"Recalled"
TempShare1$Placebo_rec_new_dist<-TempShare1$Placebo_rec_new_dist-1


TempShare1<-subset(TempShare1, Placebo_rec_new_dist>=-14 & Placebo_rec_new_dist<=14)
#TempShare1<-subset(TempShare1, Placebo_rec_new_dist>=-5 & Placebo_rec_new_dist<=5)

DID_graph<-ggplot(data=TempShare1, aes(x=Placebo_rec_new_dist, y=resale , group = recname))+
  geom_line(size=2, aes(linetype=as.character(recname ))) +
  geom_vline(xintercept=0)+
  labs(x="Treatment month", y="Share of resales", linetype="")+
  theme(text=element_text(size=16),plot.title = element_text(hjust = 0.5)) + 
  theme(legend.position="bottom")
DID_graph
ggsave("DID_graph.pdf", plot=DID_graph, width=8, height=5)

####FIGURE 3: PARALLEL TREND TEST####


rm(list=ls()); gc()
objects()

load("Panel_all_complete.RData")

Panel_all<-subset(Panel_all, select=c(resale, rec_precat2, rec_precat3, rec_precat4, rec_postcat1, rec_postcat2, rec_postcat3, rec_postcat4, recalled, date, age_car_d, difflastsaleyr_d, id_ver, id_type_date, id_ver_rec, id_ver_date ))

# PRE-TREND TEST, VERSION FE
reg.pretrend1<- felm(resale ~ rec_precat2 + rec_precat3 + rec_precat4 + rec_postcat1 + rec_postcat2 + rec_postcat3 + rec_postcat4 + recalled + date + age_car_d + difflastsaleyr_d| id_ver | 0 | id_type_date, data = Panel_all)
summary(reg.pretrend1)

#Put in graph
conf95_1<-confint(reg.pretrend1, level=0.95)
conf95<-subset(conf95_1, str_detect(row.names(conf95_1),"rec_.*?"))
conf90_1<-confint(reg.pretrend1, level=0.90)
conf90<-subset(conf90_1, str_detect(row.names(conf90_1),"rec_.*?"))
coef_1<-coef(reg.pretrend1)
coef<-subset(coef_1, str_detect(names(coef_1),"rec_.*?"))
save(list=c("coef_1", "conf95_1", "conf90_1"), file="Coeftrend_1.RData" )


graph<-cbind(names(coef),coef, conf95, conf90)
graph<-rbind(graph, c("rec_precat1", 0, 0, 0, 0, 0))
graph<-cbind(graph, c(-2,-3,-4,0,1,2,3,-1))
colnames(graph)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
graph<-as.data.frame(graph,stringsAsFactors=FALSE)


graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))
graph1<-graph

paralleltest1<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=3) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.2, size=0.75) +
  #geom_errorbar(aes(ymin=conf90min, ymax=conf90max), width=.1, size=0.75) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (quarters)") +
  ylab("Coefficient estimates") +
  ylim(-0.026, +0.025) +
  ggtitle("Version FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
rm(reg.pretrend1)
gc()
 



print("PRE-TREND TEST, VERSION BY RECALL STATUS FE")
reg.pretrend2<- felm(resale ~ rec_precat2 + rec_precat3 + rec_precat4 + rec_postcat1 + rec_postcat2 + rec_postcat3 + rec_postcat4 + recalled + date + age_car_d + difflastsaleyr_d| id_ver_rec | 0 | id_type_date, data = Panel_all)
summary(reg.pretrend2)

#Put in graph
conf95_2<-confint(reg.pretrend2, level=0.95)
conf95<-subset(conf95_2, str_detect(row.names(conf95_2),"rec_.*?"))
conf90_2<-confint(reg.pretrend2, level=0.90)
conf90<-subset(conf90_2, str_detect(row.names(conf90_2),"rec_.*?"))
coef_2<-coef(reg.pretrend2)
coef<-subset(coef_2, str_detect(names(coef_2),"rec_.*?"))
save(list=c("coef_2", "conf95_2", "conf90_2"), file="Coeftrend_2.RData" )

graph<-cbind(names(coef),coef, conf95, conf90)
graph<-rbind(graph, c("rec_precat1", 0, 0, 0, 0, 0))
graph<-cbind(graph, c(-2,-3,-4,0,1,2,3,-1))
colnames(graph)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
graph<-as.data.frame(graph,stringsAsFactors=FALSE)


graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))
graph2<-graph

paralleltest2<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=3) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.2, size=0.75) +
  #geom_errorbar(aes(ymin=conf90min, ymax=conf90max), width=.1, size=0.75) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (quarters)") +
  ylab("Coefficient estimates") +
  ylim(-0.026, +0.025) +
  ggtitle("Version-Recall FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
rm(reg.pretrend2)
gc()




print("PRE-TREND TEST, VERSION BY RECALL STATUS FE")
reg.pretrend3<- felm(resale ~ rec_precat2 + rec_precat3 + rec_precat4 + rec_postcat1 + rec_postcat2 + rec_postcat3 + rec_postcat4 + recalled + date + age_car_d + difflastsaleyr_d| id_ver_date | 0 | id_type_date, data = Panel_all)
summary(reg.pretrend3)

#Put in graph
conf95_3<-confint(reg.pretrend3, level=0.95)
conf95<-subset(conf95_3, str_detect(row.names(conf95_3),"rec_.*?"))
conf90_3<-confint(reg.pretrend3, level=0.90)
conf90<-subset(conf90_3, str_detect(row.names(conf90_3),"rec_.*?"))
coef_3<-coef(reg.pretrend3)
coef<-subset(coef_3, str_detect(names(coef_3),"rec_.*?"))
save(list=c("coef_3", "conf95_3", "conf90_3"), file="Coeftrend_3.RData" )

graph<-cbind(names(coef),coef, conf95, conf90)
graph<-rbind(graph, c("rec_precat1", 0, 0, 0, 0, 0))
graph<-cbind(graph, c(-2,-3,-4,0,1,2,3,-1))
colnames(graph)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
graph<-as.data.frame(graph,stringsAsFactors=FALSE)
graph3<-graph

graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))

paralleltest3<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=3) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.2, size=0.75) +
  # geom_errorbar(aes(ymin=conf90min, ymax=conf90max), width=.1, size=0.75) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (quarters)") +
  ylab("Coefficient estimates") +
  ylim(-0.026, +0.025) +
  ggtitle("Version-Time FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
rm(reg.pretrend3)
gc()







# #PRE-TREND TEST, VERSION BY RECALL STATUS AND VERSION BY TIME FE
reg.pretrend4<- felm(resale ~ rec_precat2 + rec_precat3 + rec_precat4 + rec_postcat1 + rec_postcat2 + rec_postcat3 + rec_postcat4 + recalled + date + age_car_d + difflastsaleyr_d| id_ver_rec + id_ver_date | 0 | id_type_date, data = Panel_all)
summary(reg.pretrend4)

#Put in graph
conf95_4<-confint(reg.pretrend4, level=0.95)
conf95<-subset(conf95_4, str_detect(row.names(conf95_4),"rec_.*?"))
conf90_4<-confint(reg.pretrend4, level=0.90)
conf90<-subset(conf90_4, str_detect(row.names(conf90_4),"rec_.*?"))
coef_4<-coef(reg.pretrend4)
coef<-subset(coef_4, str_detect(names(coef_4),"rec_.*?"))
save(list=c("coef_4", "conf95_4", "conf90_4"), file="Coeftrend_4.RData" )

graph<-cbind(names(coef),coef, conf95, conf90)
graph<-rbind(graph, c("rec_precat1", 0, 0, 0, 0, 0))
graph<-cbind(graph, c(-2,-3,-4,0,1,2,3,-1))
colnames(graph)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
graph<-as.data.frame(graph,stringsAsFactors=FALSE)


graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))

graph4<-graph

paralleltest4<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=3) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.2, size=0.75) +
  # geom_errorbar(aes(ymin=conf90min, ymax=conf90max), width=.1, size=0.75) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (quarters)") +
  ylab("Coefficient estimates") +
  ylim(-0.03, +0.025) +
  ggtitle("Version-Recall and Version-Time FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
rm(reg.pretrend4)
gc()



graph<-graph1
graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))
paralleltest1<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=2) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.3, size=0.5) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (3 months)") +
  ylab("Coefficient estimates") +
  ylim(-0.03, +0.025) +
  ggtitle("Version FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
paralleltest1
graph<-graph2
graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))
paralleltest2<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=2) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.3, size=0.5) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (3 months)") +
  ylab("Coefficient estimates") +
  ylim(-0.03, +0.025) +
  ggtitle("Version-Recall FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
paralleltest2
graph<-graph3
graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))
paralleltest3<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=2) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.3, size=0.5) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (3 months)") +
  ylab("Coefficient estimates") +
  ylim(-0.03, +0.025) +
  ggtitle("Version-Time FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
paralleltest3
graph<-graph4
graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))
paralleltest4<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=2) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.3, size=0.5) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (3 months)") +
  ylab("Coefficient estimates") +
  ylim(-0.03, +0.025) +
  ggtitle("Version-Recall, Version-Time FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
paralleltest4


# 

# 
#SAVE GRAPHS AND PUT TOGETHER
paralleltest<-grid.arrange(paralleltest1, paralleltest2,paralleltest3, paralleltest4 , ncol = 2, nrow = 2)
paralleltest
ggsave("Paralleltest.pdf", plot=paralleltest, width=9, height=5)


####TABLE 1: SUMMARY STATISTICS BY VEHICLE VERSION, RECALLED AND NON-RECALLED VEHICLES####


#First repeat the same procedure used to get the vehicle sample for Panel.RData
rm(list=ls()); gc()
objects()


load("Recalls_merged_2019.RData")
Data$kenteken<-as.character(Data$kenteken)

#Remove 2284 duplicate cars
Data$dupl<-duplicated(Data$kenteken)
kentekendupl<-subset(Data, dupl==TRUE, select=kenteken)
Data<-subset(Data, !c(kenteken %in% kentekendupl$kenteken) )
Data$dupl<-NULL


#Prepare for counting variables
Data$num<-1
#Is the car recalled AND only from Nov 2017?
Data$rec<-0
Data$rec[is.na(Data$referentiecoderdw)==FALSE & Data$publicatiedatumrdw>="2017-11-01"]<-1




#Calculate number of total cars and recently recalled cars within each type-variant-version
Temp<-aggregate(cbind(num, rec) ~ typegoedkeuringsnummer + variant + uitvoering, data=Data, FUN="sum")
#Version ok if:
#1) Total cars > recalled cars
#2) At least one recalled car
#3) Type approval code present
#4) No cars with recalls BEFORE Nov 2017

Temp$ok<- Temp$num>Temp$rec & Temp$rec>0 & Temp$typegoedkeuringsnummer!=""
table(Temp$ok)




#Create unique identifier for version
varvariantver<-paste(Temp$typegoedkeuringsnummer[Temp$ok==TRUE],Temp$variant[Temp$ok==TRUE],Temp$uitvoering[Temp$ok==TRUE], sep="_")
Data$typevarver<-paste(Data$typegoedkeuringsnummer,Data$variant, Data$uitvoering, sep="_")

#Does the version belong to those with sufficient variation in recalls?
Data$var_type_var_ver1<-Data$typevarver %in% varvariantver


#Remove unnecessary variables
rm(varvariantver, Temp, kentekendupl)
Data$typevarver<-NULL
Data$num<-NULL
Data$rec<-NULL


#Get only cars belonging to variants with sufficient variation in recalls
Data<-subset(Data, var_type_var_ver1==TRUE )





load("Link_Char.RData")
Link_Char<-subset(Link_Char, select=-c(typegoedkeuringsnummer, variant, uitvoering))

Data<-merge(Data, Link_Char, by="kenteken", all.x=TRUE, all.y=FALSE)
Data$rec<-0
Data$rec[is.na(Data$recall_new)==FALSE]<-1

Data$id_ver<-as.numeric(as.factor(paste(Data$typegoedkeuringsnummer, Data$variant, Data$uitvoering, sep="_")))

#Get age in months at Apr 2019
Data$age<-((4+2019*12)-(month(Data$datumeerstetoelating)+year(Data$datumeerstetoelating)*12))
#Time in months from recall and import in NL
Data$time_rec<-(month(Data$recall_new)+year(Data$recall_new)*12)-(month(Data$datumeersteafgiftenederland)+year(Data$datumeersteafgiftenederland)*12)
#Difference in month from RDW publication date of recall and recall status change
Data$diff_rec<-(month(Data$publicatiedatumrdw)+year(Data$publicatiedatumrdw)*12)-(month(Data$recall_new)+year(Data$recall_new)*12)


table(Data$time_rec<=0)/nrow(subset(Data,rec==1))

summary(Data$diff_rec)




Stats<-aggregate(cbind(brutobpm, aantalzitplaatsen, aantalcilinders, cilinderinhoud, massaledigvoertuig, toegestanemaximummassavoertuig,
                       massarijklaar, catalogusprijs, aantaldeuren, age) ~ id_ver + rec, data=Data, FUN=mean)



tround<-function(x,r) {format(round(x,r),nsmall=r)}


temp<-t.test(catalogusprijs ~ rec, data=Stats)
tt<-data.table("List price", temp$estimate[1], temp$estimate[2], paste(tround(temp$p.value,3), stars.pval(temp$p.value), sep=""))
temp<-t.test(aantalzitplaatsen ~ rec, data=Stats)
tt<-rbind(tt,data.table("Num. seats", temp$estimate[1], temp$estimate[2], paste(tround(temp$p.value,3), stars.pval(temp$p.value), sep="")))
temp<-t.test(aantaldeuren ~ rec, data=Stats)
tt<-rbind(tt,data.table("Num. doors", temp$estimate[1], temp$estimate[2], paste(tround(temp$p.value,3), stars.pval(temp$p.value), sep="")))
temp<-t.test(aantalcilinders ~ rec, data=Stats)
tt<-rbind(tt,data.table("Num. cylinders", temp$estimate[1], temp$estimate[2], paste(tround(temp$p.value,3), stars.pval(temp$p.value), sep="")))
temp<-t.test(cilinderinhoud ~ rec, data=Stats)
tt<-rbind(tt,data.table("Engine displacement", temp$estimate[1], temp$estimate[2], paste(tround(temp$p.value,3), stars.pval(temp$p.value), sep="")))
temp<-t.test(massaledigvoertuig ~ rec, data=Stats)
tt<-rbind(tt,data.table("Mass empty vehicle", temp$estimate[1], temp$estimate[2], paste(tround(temp$p.value,3), stars.pval(temp$p.value), sep="")))
temp<-t.test(massarijklaar ~ rec, data=Stats)
tt<-rbind(tt,data.table("Mass on road", temp$estimate[1], temp$estimate[2], paste(tround(temp$p.value,3), stars.pval(temp$p.value), sep="")))
temp<-t.test(toegestanemaximummassavoertuig ~ rec, data=Stats)
tt<-rbind(tt,data.table("Max. mass allowed", temp$estimate[1], temp$estimate[2], paste(tround(temp$p.value,3), stars.pval(temp$p.value), sep="")))
temp<-t.test(age ~ rec, data=Stats)
tt<-rbind(tt,data.table("Age (months)", temp$estimate[1], temp$estimate[2], paste(tround(temp$p.value,3), stars.pval(temp$p.value), sep="")))


print(xtable(tt),include.rownames=FALSE, include.colnames=FALSE,only.contents= TRUE, hline.after=NULL,
      file="Stats_version.tex")

#### TABLE 2: OVERALL EFFECT OF RECALLS ####


rm(list=ls()); gc()
objects()



load("Panel_all_complete.RData")


nrow(Panel_all)
length(unique(Panel_all$kenteken))


print("BASIC REGRESSION, VERSION FE")
reg.simple1<- felm(resale ~ rec_new_t + recalled + date + age_car_d + difflastsaleyr_d| id_ver | 0 | id_type_date, data = Panel_all)
summary(reg.simple1)
rm(reg.simple1)
gc()

print("BASIC REGRESSION, VERSION-RECALL FE")
reg.simple2<- felm(resale ~ rec_new_t + recalled + date + age_car_d + difflastsaleyr_d|  id_ver_rec | 0 | id_type_date, data = Panel_all)
summary(reg.simple2)
rm(reg.simple2)
gc()

print("BASIC REGRESSION, VERSION-TIME FE")
reg.simple3<- felm(resale ~ rec_new_t + recalled + date + age_car_d + difflastsaleyr_d|  id_ver_date | 0 | id_type_date, data = Panel_all)
summary(reg.simple3)
rm(reg.simple3)
gc()

print("BASIC REGRESSION, VERSION-RECALL + VERSION-TIME FE")
reg.simple4<- felm(resale ~ rec_new_t + recalled + date + age_car_d + difflastsaleyr_d|  id_ver_rec + id_ver_date | 0 | id_type_date, data = Panel_all)
summary(reg.simple4)
rm(reg.simple4)
gc()


#### TABLE 3: RESULTS BY REAL VEHICLE LISTED PRICE, SEPARATE REGRESSIONS ####


rm(list=ls()); gc()
objects()


load("Panel_all_complete.RData")

#Remove variables with no price info
Panel_all<-subset(Panel_all, is.na(realprice)==FALSE)

#Get variation in recalls (e.g. change in recall status for part of vehicles in variants during sample period)
Temp1<-subset(Panel_all, date=="2019-03-01" & realprice<30000)
Temp2<-aggregate(rec_new_t ~ id_ver, data=Temp1, FUN="mean")
Temp3<-subset(Temp2, rec_new_t>0 & rec_new_t<1)
nrow(Temp3)/nrow(Temp2)

Temp1<-subset(Panel_all, date=="2019-03-01" & realprice>=30000)
Temp2<-aggregate(rec_new_t ~ id_ver, data=Temp1, FUN="mean")
Temp3<-subset(Temp2, rec_new_t>0 & rec_new_t<1)
nrow(Temp3)/nrow(Temp2)



Temp<-subset(Panel_all, Panel_all$realprice<30000)

print("PRICE <30K REGRESSION,  VERSION FE")
reg.price1<- felm(resale ~ rec_new_t + recalled + date  + age_car_d + difflastsaleyr_d  | id_ver | 0 | id_type_date, data = Temp)
summary(reg.price1)
rm(reg.price1)
gc()


print("PRICE <30K REGRESSION, VERSION BY RECALLED FE")
reg.price2<- felm(resale ~ rec_new_t + date  + age_car_d + difflastsaleyr_d | id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.price2)
rm(reg.price2)
gc()



print("PRICE <30K REGRESSION, VERSION BY TIME FE")
reg.price3<- felm(resale ~ rec_new_t + recalled + age_car_d + difflastsaleyr_d | id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.price3)
rm(reg.price3)
gc() 



print("PRICE <30K REGRESSION, VERSION BY TIME + VERSION BY RECALLED FE")
reg.price4<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.price4)
rm(reg.price4)
gc()


Temp<-subset(Panel_all, realprice>=30000)
print("PRICE >=30K REGRESSION,  VERSION FE")
reg.price1<- felm(resale ~ rec_new_t + recalled + date  + age_car_d + difflastsaleyr_d  | id_ver | 0 | id_type_date, data = Temp)
summary(reg.price1)
rm(reg.price1)
gc()

print("PRICE >=20K REGRESSION, VERSION BY RECALLED FE")
reg.price2<- felm(resale ~ rec_new_t + date  + age_car_d + difflastsaleyr_d | id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.price2)
rm(reg.price2)
gc()


print("PRICE >=30K REGRESSION, VERSION BY TIME FE")
reg.price3<- felm(resale ~ rec_new_t + recalled + age_car_d + difflastsaleyr_d | id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.price3)
rm(reg.price3)
gc() 


print("PRICE >=30K REGRESSION, VERSION BY TIME + VERSION BY RECALLED FE")
reg.price4<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.price4)
rm(reg.price4)
gc()


#### TABLE 4: RESULTS BY APK INSPECTION DEFECTS, SEPARATE REGRESSIONS ####


rm(list=ls()); gc()
objects()


load("Panel_all_complete.RData")


#Get variation in recalls (e.g. change in recall status for part of vehicles in variants during sample period)
Temp1<-subset(Panel_all, date=="2019-03-01" & Panel_all$APKtot==0 & Panel_all$age_car>=4)
Temp2<-aggregate(rec_new_t ~ id_ver, data=Temp1, FUN="mean")
Temp3<-subset(Temp2, rec_new_t>0 & rec_new_t<1)
nrow(Temp3)/nrow(Temp2)

Temp1<-subset(Panel_all, date=="2019-03-01" & Panel_all$APKtot>=1 & Panel_all$age_car>=4)
Temp2<-aggregate(rec_new_t ~ id_ver, data=Temp1, FUN="mean")
Temp3<-subset(Temp2, rec_new_t>0 & rec_new_t<1)
nrow(Temp3)/nrow(Temp2)

Temp<-subset(Panel_all, Panel_all$APKtot==0 & Panel_all$age_car>=4)
length(unique(Temp$kenteken))
nrow(Temp)


print("APK 0, BASELINE")
reg.apk0<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date + recalled | id_ver | 0 | id_type_date, data = Temp)
summary(reg.apk0)


print("APK 0, RECALLED-VERSION FE")
reg.apk0<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date | id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.apk0)

print("APK 0, TIME-VERSION FE")
reg.apk0<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + recalled | id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.apk0)

print("APK 0, TIME-VERSION + RECALLED-VERSION FE")
reg.apk0<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d| id_ver_rec + id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.apk0)


Temp<-subset(Panel_all, Panel_all$APKtot>=1 & Panel_all$age_car>=4)
length(unique(Temp$kenteken))
nrow(Temp)

rm(Panel_all)
gc()
print("APK 1, BASELINE")
reg.apk1<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date + recalled | id_ver | 0 | id_type_date, data = Temp)
summary(reg.apk1)

print("APK 1, RECALLED-VERSION FE")
reg.apk1<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date | id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.apk1)

print("APK 1, TIME-VERSION FE")
reg.apk1<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + recalled | id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.apk1)

print("APK 1, TIME-VERSION + RECALLED-VERSION FE")
reg.apk1<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d| id_ver_rec + id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.apk1)

#### TABLE 5: RESULTS BY BRAND RATING, SEPARATE REGRESSIONS ####


rm(list=ls()); gc()
objects()


load("Panel_all_complete.RData")

#Get variation in recalls (e.g. change in recall status for part of vehicles in variants during sample period)
Temp1<-subset(Panel_all, date=="2019-03-01" & is.na(Panel_all$rating_merk)==FALSE & Panel_all$rating_merk<7)
Temp2<-aggregate(rec_new_t ~ id_ver, data=Temp1, FUN="mean")
Temp3<-subset(Temp2, rec_new_t>0 & rec_new_t<1)
nrow(Temp3)/nrow(Temp2)

Temp1<-subset(Panel_all, date=="2019-03-01" & is.na(Panel_all$rating_merk)==FALSE & Panel_all$rating_merk>=7)
Temp2<-aggregate(rec_new_t ~ id_ver, data=Temp1, FUN="mean")
Temp3<-subset(Temp2, rec_new_t>0 & rec_new_t<1)
nrow(Temp3)/nrow(Temp2)

Temp<-subset(Panel_all, is.na(Panel_all$rating_merk)==FALSE & Panel_all$rating_merk<7)
nrow(Temp)
length(as.character(unique(Temp$kenteken)))


print("RATING BELOW 7, BASELINE")
reg.ratinglow<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date + recalled | id_ver | 0 | id_type_date, data = Temp)
summary(reg.ratinglow)

print("RATING BELOW 7, RECALLED-VERSION FE")
reg.ratinglow<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date | id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.ratinglow)

print("RATING BELOW 7, TIME-VERSION FE")
reg.ratinglow<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + recalled | id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.ratinglow)

print("RATING BELOW 7, TIME-VERSION + RECALLED-VERSION FE")
reg.pretrend5<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d| id_ver_rec + id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.pretrend5)


Temp<-subset(Panel_all, is.na(Panel_all$rating_merk)==FALSE & Panel_all$rating_merk>=7)
nrow(Temp)
length(as.character(unique(Temp$kenteken)))

print("RATING 7 AND ABOVE, BASELINE")
reg.ratinghigh<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date + recalled | id_ver | 0 | id_type_date, data = Temp)
summary(reg.ratinghigh)

print("RATING 7 AND ABOVE, RECALLED-VERSION FE")
reg.ratinghigh<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date | id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.ratinghigh)

print("RATING 7 AND ABOVE, TIME-VERSION FE")
reg.ratinghigh<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + recalled | id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.ratinghigh)

print("RATING 7 AND ABOVE, TIME-VERSION + RECALLED-VERSION FE")
reg.ratinghigh<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d| id_ver_rec + id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.ratinghigh)


#### FIGURE A.1: MARKET SHARE BY MANUFACTURER, TOP 20 BRANDS ####



rm(list=ls()); gc()
objects()


load("Registration_recalls_merged.RData")
load("Link_Makes.RData")

#Merge main dataset with brand name
Main<-merge(Main, Makes, by="kenteken", all.x=TRUE, all.y=FALSE)
rm(Makes)
gc()

#Get total number of cars in the dataset
Main$num<-1
Temp1<-aggregate(num ~ merk, data=Main, FUN="sum" )

#Get market share
Temp1$mktshare<-Temp1$num/nrow(Main)
#Order by size
TempMerkShare<-Temp1[order(-Temp1$mktshare),]
#Get top 20 brands
TempMerkShare<-TempMerkShare[1:20,]
rm(Main)
gc()

#Create graph
Market_share_Merk<-ggplot(data=TempMerkShare, aes(x=reorder(merk, mktshare), y=mktshare))+
  geom_bar(position="dodge",stat="identity") +
  coord_flip() +
  labs(x="", y="Market share")+
  scale_y_continuous(breaks=seq(0, 0.12, by = 0.02))+
  theme(text=element_text(size=16),plot.title = element_text(hjust = 0.5))
Market_share_Merk
ggsave("Market_share_Merk.pdf", plot=Market_share_Merk, width=8, height=5)


#### FIGURE A.2 - SHARE OF RECALLS BY BRAND (NOV 2017 - APR 2019), TOP 20, ONLY BRANDS WITH >0.1% MARKET SHARE ####
rm(list=ls()); gc()
objects()


load("Registration_recalls_merged.RData")
load("Link_Makes.RData")

#Merge main data with brand information
Main<-merge(Main, Makes, by="kenteken", all.x=TRUE, all.y=FALSE)
rm(Makes)
gc()

#Get recall data
load("Recall_week1.RData")

#Get recalls in our sample period
Recallw$rec<-1
Recallw<-unique(subset(Recallw, is.na(recall_new)==FALSE & recall_new>="2017-11-01", select=c(kenteken, rec)))
#Merge with main data
Main<-merge(Main, Recallw, by="kenteken", all.x=TRUE, all.y=FALSE)
Main$rec[is.na(Main$rec)]<-0
Main$num<-1

#Get number of vehicles and recalls by brand
Temp<-aggregate(rec ~ merk, data=Main, FUN="sum" )
Temp1<-aggregate(num ~ merk, data=Main, FUN="sum" )

#Get market share
Temp1$mktshare<-Temp1$num/nrow(Main)
rm(Main)
gc()

#Merge num recalls and cars by brand
Temp2<-merge(Temp, Temp1, by="merk", all.x=TRUE, all.y=TRUE)


Temp2$recshare<-Temp2$rec/Temp2$num
#Drop brands with very low market share
Temp2<-subset(Temp2, mktshare>0.001)
#Get top 20 brands by share of recalled vehicles
Temp2<-Temp2[order(-Temp2$recshare),]
Temp2<-Temp2[1:20,]

#Plot share recalled vehicles
library("ggplot2") #Graphs
Recalled_share_Merk<-ggplot(data=Temp2, aes(x=reorder(merk, recshare)  , y=recshare))+
  geom_bar(position="dodge",stat="identity") +
  coord_flip()+
  labs(x="", y="Share recalled")+
  theme(text=element_text(size=16),plot.title = element_text(hjust = 0.5))
Recalled_share_Merk
ggsave("Recalled_share_Merk.pdf", plot=Recalled_share_Merk, width=8, height=5)

#### FIGURE A.3 - SHARE OF VEHICLES RECALLED BY YEAR OF FIRST REGISTRATION ####


rm(list=ls()); gc()
objects()


load("Registration_recalls_merged.RData")

Main$year<-as.numeric(substr(Main$datumeerstetoelating, 7,10 ))
Main$rec<-0
Main$rec[is.na(Main$rec_codes)==FALSE]<-1

Temp<-aggregate(rec ~  year, data=Main, FUN="mean")

Temp<-subset(Temp, year>=2012 & year<=2018 )


Graph<-ggplot(Temp, aes(x=year, y=rec))+
  geom_line(size=2)+
  xlab("Year of first registration") +
  ylab("Share recalled vehicles") +
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))+
  scale_y_continuous(breaks=seq(0,0.3,by=0.05))+
  scale_x_continuous(breaks=seq(2012,2018,by=1), labels=seq(2012,2018,by=1))+
  theme(panel.grid.minor = element_blank())
Graph

ggsave("Recalls_by_regyear.pdf", plot=Graph, width=8, height=5)


#### FIGURE A.4 - VEHICLE RECALLS BY NUMBER OF VEHICLES AFFECTED (LOG SCALE) ####

rm(list=ls()); gc()
objects()

options(scipen=10000)

load("Recall_info_2019.RData")


Recall_info$num<-1
Recall_info<-subset(Recall_info, totaalaantalvoertuigenterugroepactie>0)


Temp<-unique(subset(Recall_info, select=c(referentiecoderdw, totaalaantalvoertuigenterugroepactie, nationaalopgegevenaantalvoertuigenterugroepactie)))

Temp<-subset(Temp, totaalaantalvoertuigenterugroepactie>0 & nationaalopgegevenaantalvoertuigenterugroepactie>0)



#Calculate percentile of each recall
Temp$totaalaantalvoertuigenterugroepactie_qnt<-ecdf(Temp$totaalaantalvoertuigenterugroepactie)(Temp$totaalaantalvoertuigenterugroepactie)
Recall_prc_world<-ggplot(Temp, aes(x=totaalaantalvoertuigenterugroepactie_qnt, y=totaalaantalvoertuigenterugroepactie))+
  geom_point(size=2)+
  expand_limits(y = c(1, 11000000))+
  scale_y_continuous(trans='log10', breaks=c(10, 100, 1000, 10000, 100000, 1000000, 10000000), labels=c("10", expression(paste(10^{2})), expression(paste(10^{3})), expression(paste(10^{4})), expression(paste(10^{5})), expression(paste(10^{6})), expression(paste(10^{7}))) )+
  labs(x="Recall percentile", y="Vehicles affected worldwide") +
  theme(text = element_text(size=20), axis.text= element_text(size=18))
Recall_prc_world

Temp$nationaalopgegevenaantalvoertuigenterugroepactie_qnt<-ecdf(Temp$nationaalopgegevenaantalvoertuigenterugroepactie)(Temp$nationaalopgegevenaantalvoertuigenterugroepactie)
Temp<-subset(Temp, nationaalopgegevenaantalvoertuigenterugroepactie>0)
Recall_prc_nl<-ggplot(Temp, aes(x=nationaalopgegevenaantalvoertuigenterugroepactie_qnt, y=nationaalopgegevenaantalvoertuigenterugroepactie))+
  geom_point(size=2)+
  scale_y_continuous(trans='log10', breaks=c(10, 100, 1000, 10000), labels=c("10", expression(paste(10^{2})), expression(paste(10^{3})), expression(paste(10^{4})) )) +
  labs(x="Recall percentile", y="Vehicles affected in NL") +
  theme(text = element_text(size=20), axis.text= element_text(size=18))
Recall_prc_nl


Recall_prc<-grid.arrange(Recall_prc_world, Recall_prc_nl, ncol=2)
Recall_prc
ggsave("Recall_prc.pdf", plot=Recall_prc, width=9, height=5) 

#### FIGURE C.1 - EFFECT OF VEHICLE RECALLS ON RESALES: RECALLED AND NON-RECALLED VEHICLES, SAME VARIANT ####


rm(list=ls()); gc()
objects()



#The file Panel already contains the panel only for cars belonging to versions with sufficient variation in recalls (>0% and <100% cars recalled)
#And only one recall
#And only recalls from Nov 2017



load("Panel_var.RData")



nrow(unique(subset(Panel_var, select=kenteken)))

##
#FOUR GROUPS
#RECALLED BEFORE RECALL
#RECALLED AFTER RECALL
#NOT RECALLED BEFORE PLACEBO RECALL
#RECALLED AFTER PLACEBO RECALL

Panel<-Panel_var
rm(Panel_var)

#Get modal date of recall (in some cases it differs slightly within variant, even if it is the same recall)
#The modal date of recall is used as placebo recall date for non-recalled vehicles in the same version.
Panel$num<-1
Temp<-aggregate(num ~ recall_new + id_var, data=Panel, FUN=sum, subset=is.na(recall_new)==FALSE )
#This contains, for each variant, the highest number of vehicles with a date of notification 
#(if date of notification is only one, then total recalled cars = maxval )
Temp1<-as.data.frame(unlist(by(Temp$num, Temp$id_var, FUN=max, simplify=FALSE), use.names=TRUE))
Temp1$id_var<-row.names(Temp1)
names(Temp1)<-c("maxval" ,"id_var")

#Get for each variant the date of recall that appear for more cars.
Temp2<-merge(Temp, Temp1, by="id_var", all.x=TRUE, all.y=FALSE)
Temp2$ok<-Temp2$num==Temp2$maxval
Temp2$placeborec<-Temp2$recall_new
Temp2<-subset(Temp2, ok==TRUE, select=c(id_var, placeborec))

#A few versions (21) have the same number of obs for two different recall dates: for those, select the earliest date
Temp2<-aggregate(placeborec ~ id_var, data=Temp2, FUN=min)

#Add the placebo recall date to the panel
Panel<-merge(Panel, Temp2, by="id_var", all.x=TRUE, all.y=FALSE)
#Create distance from modal recall
Panel$Placebo_rec_new_dist<-((year(Panel$date)*12)+(month(Panel$date)))-((year(Panel$placeborec)*12)+month(Panel$placeborec))
#If a car was actually recalled, use the distance from actual date of recall (even if different from modal date)
Panel$Placebo_rec_new_dist[is.na(Panel$recall_new)==FALSE]<-Panel$rec_new_dist_both[is.na(Panel$recall_new)==FALSE]


Panel$rec<-0
Panel$rec[is.na(Panel$recall_new)==FALSE]<-1

#First get the average resale rate within version for each distance (separately from recalled and non recalled)
TempShare<-aggregate(resale ~ Placebo_rec_new_dist + rec + id_var , data=Panel, FUN="mean"  )
#Then get the average of the average for each distance (separately from recalled and non recalled)
TempShare1<-aggregate(resale ~ Placebo_rec_new_dist + rec, data=TempShare, FUN="mean"  )

TempShare1$recname<-""
TempShare1$recname[TempShare1$rec==0]<-"Non-Recalled"
TempShare1$recname[TempShare1$rec==1]<-"Recalled"
TempShare1$Placebo_rec_new_dist<-TempShare1$Placebo_rec_new_dist-1

TempShare1<-subset(TempShare1, Placebo_rec_new_dist>=-14 & Placebo_rec_new_dist<=14)


DID_graph<-ggplot(data=TempShare1, aes(x=Placebo_rec_new_dist, y=resale , group = recname))+
  geom_line(size=2, aes(linetype=as.character(recname ))) +
  geom_vline(xintercept=0)+
  labs(x="Treatment month", y="Share of resales", linetype="")+
  theme(text=element_text(size=16),plot.title = element_text(hjust = 0.5)) + 
  theme(legend.position="bottom")
DID_graph
ggsave("DID_graph_variant.pdf", plot=DID_graph, width=8, height=5)

#### FIGURE C.2 - EFFECT OF VEHICLE RECALLS ON RESALES: RECALLED AND NON-RECALLED VEHICLES, SAME MODEL



rm(list=ls()); gc()
objects()


#The file Panel already contains the panel only for cars belonging to versions with sufficient variation in recalls (>0% and <100% cars recalled)
#And only one recall
#And only recalls from Nov 2017




load("Panel_model.RData")



nrow(unique(subset(Panel_model, select=kenteken)))

##
#FOUR GROUPS
#RECALLED BEFORE RECALL
#RECALLED AFTER RECALL
#NOT RECALLED BEFORE PLACEBO RECALL
#RECALLED AFTER PLACEBO RECALL

Panel<-Panel_model
rm(Panel_model)

#Get modal date of recall (in some cases it differs slightly within variant, even if it is the same recall)
#The modal date of recall is used as placebo recall date for non-recalled vehicles in the same version.
Panel$num<-1
Temp<-aggregate(num ~ recall_new + id_model, data=Panel, FUN=sum, subset=is.na(recall_new)==FALSE )
#This contains, for each variant, the highest number of vehicles with a date of notification 
#(if date of notification is only one, then total recalled cars = maxval )
Temp1<-as.data.frame(unlist(by(Temp$num, Temp$id_model, FUN=max, simplify=FALSE), use.names=TRUE))
Temp1$id_model<-row.names(Temp1)
names(Temp1)<-c("maxval" ,"id_model")

#Get for each variant the date of recall that appear for more cars.
Temp2<-merge(Temp, Temp1, by="id_model", all.x=TRUE, all.y=FALSE)
Temp2$ok<-Temp2$num==Temp2$maxval
Temp2$placeborec<-Temp2$recall_new
Temp2<-subset(Temp2, ok==TRUE, select=c(id_model, placeborec))


#Add the placebo recall date to the panel
Panel<-merge(Panel, Temp2, by="id_model", all.x=TRUE, all.y=FALSE)
#Create distance from modal recall
Panel$Placebo_rec_new_dist<-((year(Panel$date)*12)+(month(Panel$date)))-((year(Panel$placeborec)*12)+month(Panel$placeborec))
#If a car was actually recalled, use the distance from actual date of recall (even if different from modal date)
Panel$Placebo_rec_new_dist[is.na(Panel$recall_new)==FALSE]<-Panel$rec_new_dist_both[is.na(Panel$recall_new)==FALSE]



Panel$rec<-0
Panel$rec[is.na(Panel$recall_new)==FALSE]<-1

#First get the average resale rate within version for each distance (separately from recalled and non recalled)
TempShare<-aggregate(resale ~ Placebo_rec_new_dist + rec + id_model , data=Panel, FUN="mean"  )
#Then get the average of the average for each distance (separately from recalled and non recalled)
TempShare1<-aggregate(resale ~ Placebo_rec_new_dist + rec, data=TempShare, FUN="mean"  )

TempShare1$recname<-""
TempShare1$recname[TempShare1$rec==0]<-"Non-Recalled"
TempShare1$recname[TempShare1$rec==1]<-"Recalled"
TempShare1$Placebo_rec_new_dist<-TempShare1$Placebo_rec_new_dist-1

TempShare1<-subset(TempShare1, Placebo_rec_new_dist>=-14 & Placebo_rec_new_dist<=14)


DID_graph<-ggplot(data=TempShare1, aes(x=Placebo_rec_new_dist, y=resale , group = recname))+
  geom_line(size=2, aes(linetype=as.character(recname ))) +
  geom_vline(xintercept=0)+
  labs(x="Treatment month", y="Share of resales", linetype="")+
  theme(text=element_text(size=16),plot.title = element_text(hjust = 0.5)) + 
  theme(legend.position="bottom")
DID_graph
ggsave("DID_graph_model.pdf", plot=DID_graph, width=8, height=5)


#### FIGURE D.1 - PARALLEL TREND TEST, NO LARGE RECALLS ####


rm(list=ls()); gc()
objects()

load("Recall_info_2019.RData")


load("Panel_all_complete.RData")


#105 large recalls out of 1779
Largerec<-subset(Recall_info, totaalaantalvoertuigenterugroepactie>=1000000 & nationaalopgegevenaantalvoertuigenterugroepactie>=10000)



Panel_all<-subset(Panel_all, select=c(resale, rec_precat2, rec_precat3, rec_precat4, rec_postcat1, rec_postcat2, rec_postcat3, rec_postcat4, recalled, date, age_car_d, difflastsaleyr_d, id_ver, id_type_date, id_ver_rec, id_ver_date, referentiecoderdw ))


#Get which versions have a large recall for some or all their cars
Temp<-subset(Panel_all, select=id_ver,  Panel_all$referentiecoderdw %in% Largerec$referentiecoderdw )
Temp1<-as.character(unique(Temp$id_ver))
#Remove versions with large recalls (including non recalled cars)
Panel_all<-subset(Panel_all, !c(id_ver %in% Temp1)   )


length(unique(Panel_all$kenteken))

nrow(Panel_all)


# 
# #PRE-TREND TEST, VERSION FE
reg.pretrend1<- felm(resale ~ rec_precat2 + rec_precat3 + rec_precat4 + rec_postcat1 + rec_postcat2 + rec_postcat3 + rec_postcat4 + recalled + date + age_car_d + difflastsaleyr_d| id_ver | 0 | id_type_date, data = Panel_all)
summary(reg.pretrend1)

#Put in graph
conf95_1<-confint(reg.pretrend1, level=0.95)
conf95<-subset(conf95_1, str_detect(row.names(conf95_1),"rec_.*?"))
conf90_1<-confint(reg.pretrend1, level=0.90)
conf90<-subset(conf90_1, str_detect(row.names(conf90_1),"rec_.*?"))
coef_1<-coef(reg.pretrend1)
coef<-subset(coef_1, str_detect(names(coef_1),"rec_.*?"))
save(list=c("coef_1", "conf95_1", "conf90_1"), file="Coeftrend_1.RData" )


graph<-cbind(names(coef),coef, conf95, conf90)
graph<-rbind(graph, c("rec_precat1", 0, 0, 0, 0, 0))
graph<-cbind(graph, c(-2,-3,-4,0,1,2,3,-1))
colnames(graph)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
graph<-as.data.frame(graph,stringsAsFactors=FALSE)


graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))
graph1<-graph

paralleltest1<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=3) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.2, size=0.75) +
  #geom_errorbar(aes(ymin=conf90min, ymax=conf90max), width=.1, size=0.75) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (quarters)") +
  ylab("Coefficient estimates") +
  ylim(-0.026, +0.025) +
  ggtitle("Version FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
ggsave("Paralleltest1_nolargerec.pdf", plot=paralleltest1, width=8, height=5)
#rm(reg.pretrend1)
gc()
# 



print("PRE-TREND TEST, VERSION BY RECALL STATUS FE")
reg.pretrend2<- felm(resale ~ rec_precat2 + rec_precat3 + rec_precat4 + rec_postcat1 + rec_postcat2 + rec_postcat3 + rec_postcat4 + recalled + date + age_car_d + difflastsaleyr_d| id_ver_rec | 0 | id_type_date, data = Panel_all)
summary(reg.pretrend2)

#Put in graph
conf95_2<-confint(reg.pretrend2, level=0.95)
conf95<-subset(conf95_2, str_detect(row.names(conf95_2),"rec_.*?"))
conf90_2<-confint(reg.pretrend2, level=0.90)
conf90<-subset(conf90_2, str_detect(row.names(conf90_2),"rec_.*?"))
coef_2<-coef(reg.pretrend2)
coef<-subset(coef_2, str_detect(names(coef_2),"rec_.*?"))
save(list=c("coef_2", "conf95_2", "conf90_2"), file="Coeftrend_2.RData" )

graph<-cbind(names(coef),coef, conf95, conf90)
graph<-rbind(graph, c("rec_precat1", 0, 0, 0, 0, 0))
graph<-cbind(graph, c(-2,-3,-4,0,1,2,3,-1))
colnames(graph)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
graph<-as.data.frame(graph,stringsAsFactors=FALSE)


graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))
graph2<-graph

paralleltest2<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=3) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.2, size=0.75) +
  #geom_errorbar(aes(ymin=conf90min, ymax=conf90max), width=.1, size=0.75) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (quarters)") +
  ylab("Coefficient estimates") +
  ylim(-0.026, +0.025) +
  ggtitle("Version-Recall FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
ggsave("Paralleltest2_nolargerec.pdf", plot=paralleltest2, width=8, height=5)
#rm(reg.pretrend2)
gc()




print("PRE-TREND TEST, VERSION BY RECALL STATUS FE")
reg.pretrend3<- felm(resale ~ rec_precat2 + rec_precat3 + rec_precat4 + rec_postcat1 + rec_postcat2 + rec_postcat3 + rec_postcat4 + recalled + date + age_car_d + difflastsaleyr_d| id_ver_date | 0 | id_type_date, data = Panel_all)
summary(reg.pretrend3)

#Put in graph
conf95_3<-confint(reg.pretrend3, level=0.95)
conf95<-subset(conf95_3, str_detect(row.names(conf95_3),"rec_.*?"))
conf90_3<-confint(reg.pretrend3, level=0.90)
conf90<-subset(conf90_3, str_detect(row.names(conf90_3),"rec_.*?"))
coef_3<-coef(reg.pretrend3)
coef<-subset(coef_3, str_detect(names(coef_3),"rec_.*?"))
save(list=c("coef_3", "conf95_3", "conf90_3"), file="Coeftrend_3.RData" )

graph<-cbind(names(coef),coef, conf95, conf90)
graph<-rbind(graph, c("rec_precat1", 0, 0, 0, 0, 0))
graph<-cbind(graph, c(-2,-3,-4,0,1,2,3,-1))
colnames(graph)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
graph<-as.data.frame(graph,stringsAsFactors=FALSE)
graph3<-graph

graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))

paralleltest3<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=3) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.2, size=0.75) +
  # geom_errorbar(aes(ymin=conf90min, ymax=conf90max), width=.1, size=0.75) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (quarters)") +
  ylab("Coefficient estimates") +
  ylim(-0.026, +0.025) +
  ggtitle("Version-Time FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
ggsave("Paralleltest3_nolargerec.pdf", plot=paralleltest3, width=8, height=5)
rm(reg.pretrend3)
gc()







# #PRE-TREND TEST, VERSION BY RECALL STATUS AND VERSION BY TIME FE
reg.pretrend4<- felm(resale ~ rec_precat2 + rec_precat3 + rec_precat4 + rec_postcat1 + rec_postcat2 + rec_postcat3 + rec_postcat4 + recalled + date + age_car_d + difflastsaleyr_d| id_ver_rec + id_ver_date | 0 | id_type_date, data = Panel_all)
summary(reg.pretrend4)

#Put in graph
conf95_4<-confint(reg.pretrend4, level=0.95)
conf95<-subset(conf95_4, str_detect(row.names(conf95_4),"rec_.*?"))
conf90_4<-confint(reg.pretrend4, level=0.90)
conf90<-subset(conf90_4, str_detect(row.names(conf90_4),"rec_.*?"))
coef_4<-coef(reg.pretrend4)
coef<-subset(coef_4, str_detect(names(coef_4),"rec_.*?"))
save(list=c("coef_4", "conf95_4", "conf90_4"), file="Coeftrend_4.RData" )

graph<-cbind(names(coef),coef, conf95, conf90)
graph<-rbind(graph, c("rec_precat1", 0, 0, 0, 0, 0))
graph<-cbind(graph, c(-2,-3,-4,0,1,2,3,-1))
colnames(graph)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
graph<-as.data.frame(graph,stringsAsFactors=FALSE)


graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))

graph4<-graph

paralleltest4<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=3) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.2, size=0.75) +
  # geom_errorbar(aes(ymin=conf90min, ymax=conf90max), width=.1, size=0.75) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (quarters)") +
  ylab("Coefficient estimates") +
  ylim(-0.03, +0.025) +
  ggtitle("Version-Recall and Version-Time FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
ggsave("Paralleltest4_nolargerec.pdf", plot=paralleltest4, width=8, height=5)
rm(reg.pretrend4)
gc()

conf95maxval<-0.005+max(as.numeric(c(graph1$conf95max, graph2$conf95max, graph3$conf95max, graph4$conf95max ) ))
conf95minval<-min(as.numeric(c(graph1$conf95min, graph2$conf95min, graph3$conf95min, graph4$conf95min ) ))-0.005

graph<-graph1
graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))
paralleltest1<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=2) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.3, size=0.5) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (3 months)") +
  ylab("Coefficient estimates") +
  ylim(conf95minval, conf95maxval) +
  ggtitle("Version FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
paralleltest1
graph<-graph2
graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))
paralleltest2<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=2) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.3, size=0.5) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (3 months)") +
  ylab("Coefficient estimates") +
  ylim(conf95minval, conf95maxval) +
  ggtitle("Version-Recall FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
paralleltest2
graph<-graph3
graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))
paralleltest3<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=2) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.3, size=0.5) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (3 months)") +
  ylab("Coefficient estimates") +
  ylim(conf95minval, conf95maxval) +
  ggtitle("Version-Time FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
paralleltest3
graph<-graph4
graph$coef<-as.numeric(as.character(graph$coef))
graph$conf95min<-as.numeric(as.character(graph$conf95min))
graph$conf90min<-as.numeric(as.character(graph$conf90min))
graph$conf95max<-as.numeric(as.character(graph$conf95max))
graph$conf90max<-as.numeric(as.character(graph$conf90max))
graph$pos<-as.numeric(as.character(graph$pos))
paralleltest4<-ggplot(graph, aes(x=pos, y=coef)) +
  geom_point(size=2) +
  geom_errorbar(aes(ymin=conf95min, ymax=conf95max), width=.3, size=0.5) +
  geom_hline(yintercept = 0, size=0.5, linetype="dashed")+
  xlab("Time to recall (3 months)") +
  ylab("Coefficient estimates") +
  ylim(conf95minval, conf95maxval) +
  ggtitle("Version-Recall, Version-Time FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
paralleltest4


# 

# 
#SAVE GRAPHS AND PUT TOGETHER
paralleltest<-grid.arrange(paralleltest1, paralleltest2,paralleltest3, paralleltest4 , ncol = 2, nrow = 2)
paralleltest
ggsave("Paralleltest_nolargerec.pdf", plot=paralleltest, width=9, height=5)



#### TABLE D.1 - OVERALL EFFECT OF RECALL, NO LARGE RECALLS ####


rm(list=ls()); gc()
objects()


load("Recall_info_2019.RData")


#13 large recalls
Largerec<-subset(Recall_info, totaalaantalvoertuigenterugroepactie>=1000000 & nationaalopgegevenaantalvoertuigenterugroepactie>=10000)

load("Panel_all_complete.RData")



length(unique(Panel_all$kenteken[Panel_all$recalled==1 & Panel_all$recall_new>="2017/11/1"]))

length(unique(Panel_all$kenteken[Panel_all$recalled==1 & Panel_all$referentiecoderdw %in% Largerec$referentiecoderdw & Panel_all$recall_new>="2017/11/1"]))


length(unique(Panel_all$kenteken[Panel_all$recalled==1]))

length(unique(Panel_all$kenteken[Panel_all$recalled==1 & Panel_all$referentiecoderdw %in% Largerec$referentiecoderdw]))

#Get which versions have a large recall for some or all their cars
Temp<-subset(Panel_all, select=id_ver,  Panel_all$referentiecoderdw %in% Largerec$referentiecoderdw )
Temp1<-as.character(unique(Temp$id_ver))
#Remove versions with large recalls (including non recalled cars)
Panel_all<-subset(Panel_all, !c(id_ver %in% Temp1)   )
length(unique(Panel_all$kenteken))


Panel_all$id_ver_rec<-as.factor(paste(Panel_all$typegoedkeuringsnummer, Panel_all$variant, Panel_all$uitvoering, Panel_all$recalled))

length(unique(Panel_all$kenteken))

nrow(Panel_all)


print("BASIC REGRESSION,  VERSION FE")
reg.simple1<- felm(resale ~ rec_new_t + recalled + date + age_car_d + difflastsaleyr_d| id_ver | 0 | id_type_date, data = Panel_all)
summary(reg.simple1)
rm(reg.simple1)
gc()



print("BASIC REGRESSION,  VERSION BY RECALLED FE")
reg.simple2<- felm(resale ~ rec_new_t  + date + age_car_d + difflastsaleyr_d|  id_ver_rec | 0 | id_type_date, data = Panel_all)
summary(reg.simple2)
rm(reg.simple2)
gc()

#BASIC REGRESSION, VERSION BY TIME  FE
print("BASIC REGRESSION, VERSION BY TIME FE")
reg.simple3<- felm(resale ~ rec_new_t + recalled + date + age_car_d + difflastsaleyr_d| id_ver_date | 0 | id_type_date, data = Panel_all)
summary(reg.simple3)
rm(reg.simple3)
gc()


#BASIC REGRESSION, VERSION BY TIME + VERSION BY RECALLED FE
print("BASIC REGRESSION, VERSION BY TIME FE")
reg.simple4<- felm(resale ~ rec_new_t  + date + age_car_d + difflastsaleyr_d| id_ver_date + id_ver_rec | 0 | id_type_date, data = Panel_all)
summary(reg.simple4)
rm(reg.simple4)
gc()



#### TABLE D.2 - RESULTS BY REAL VEHICLE LISTED PRICE, SEPARATE REGRESSIONS, NO LARGE RECALLS ####


rm(list=ls()); gc()
objects()

load("Recall_info_2019.RData")


load("Panel_all_complete.RData")




Panel_all<-subset(Panel_all, is.na(realprice)==FALSE)





Largerec<-subset(Recall_info, totaalaantalvoertuigenterugroepactie>=1000000 & nationaalopgegevenaantalvoertuigenterugroepactie>=10000)


#Get which versions have a large recall for some or all their cars
Temp<-subset(Panel_all, select=id_ver,  Panel_all$referentiecoderdw %in% Largerec$referentiecoderdw )
Temp1<-as.character(unique(Temp$id_ver))
#Remove versions with large recalls (including non recalled cars)
Panel_all<-subset(Panel_all, !c(id_ver %in% Temp1)   )

Panel_all$id_ver_rec<-as.factor(paste(Panel_all$typegoedkeuringsnummer, Panel_all$variant, Panel_all$uitvoering, Panel_all$recalled))

length(unique(Panel_all$kenteken))

nrow(Panel_all)



#Temp1<-subset(Panel_all, date=="2019-03-01" & realprice<30000)
#Temp2<-aggregate(rec_new_t ~ id_ver, data=Temp1, FUN="mean")
#Temp3<-subset(Temp2, rec_new_t>0 & rec_new_t<1)
#nrow(Temp3)/nrow(Temp2)

#Temp1<-subset(Panel_all, date=="2019-03-01" & realprice>=30000)
#Temp2<-aggregate(rec_new_t ~ id_ver, data=Temp1, FUN="mean")
#Temp3<-subset(Temp2, rec_new_t>0 & rec_new_t<1)
#nrow(Temp3)/nrow(Temp2)



Temp<-subset(Panel_all, Panel_all$realprice<30000)
length(unique(Temp$kenteken))

nrow(Temp)


#PRICE REGRESSION,  VERSION FE
print("PRICE <30K REGRESSION,  VERSION FE")
reg.pricelow<- felm(resale ~ rec_new_t + recalled + date  + age_car_d + difflastsaleyr_d  | id_ver | 0 | id_type_date, data = Temp)
summary(reg.pricelow)
rm(reg.pricelow)
gc()

#PRICE REGRESSION, VERSION + VERSION BY RECALLED FE
print("PRICE <30K REGRESSION, VERSION BY RECALLED FE")
reg.pricelow<- felm(resale ~ rec_new_t + date  + age_car_d + difflastsaleyr_d | id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.pricelow)
rm(reg.pricelow)
gc()


#PRICE REGRESSION, VERSION BY TIME FE
print("PRICE <30K REGRESSION, VERSION BY TIME FE")
reg.pricelow<- felm(resale ~ rec_new_t + recalled + age_car_d + difflastsaleyr_d | id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.pricelow)
rm(reg.pricelow)
gc() 



#PRICE REGRESSION, VERSION BY TIME + VERSION BY RECALLED FE
print("PRICE <30K REGRESSION, VERSION BY TIME FE")
reg.pricelow<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.pricelow)
rm(reg.pricelow)
gc()


Temp<-subset(Panel_all, realprice>=30000)
length(unique(Temp$kenteken))

nrow(Temp) 

#PRICE REGRESSION,  VERSION FE
print("PRICE >=30K REGRESSION,  VERSION FE")
reg.pricehigh<- felm(resale ~ rec_new_t + recalled + date  + age_car_d + difflastsaleyr_d  | id_ver | 0 | id_type_date, data = Temp)
summary(reg.pricehigh)
rm(reg.pricehigh)
gc()

#PRICE REGRESSION, VERSION + VERSION BY RECALLED FE
print("PRICE >=20K REGRESSION, VERSION BY RECALLED FE")
reg.pricehigh<- felm(resale ~ rec_new_t + date  + age_car_d + difflastsaleyr_d | id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.pricehigh)
rm(reg.pricehigh)
gc()


#PRICE REGRESSION, VERSION BY TIME FE
print("PRICE >=30K REGRESSION, VERSION BY TIME FE")
reg.pricehigh<- felm(resale ~ rec_new_t + recalled + age_car_d + difflastsaleyr_d | id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.pricehigh)
rm(reg.pricehigh)
gc() 



#PRICE REGRESSION, VERSION BY TIME + VERSION BY RECALLED FE
print("PRICE >=30K REGRESSION, VERSION BY TIME FE")
reg.pricehigh<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.pricehigh)
rm(reg.pricehigh)
gc()

#### TABLE D.3 - RESULTS BY APK INSPECTION DEFECTS, SEPARATE REGRESSIONS, NO LARGE RECALLS ####


rm(list=ls()); gc()
objects()




load("Recall_info_2019.RData")


load("Panel_all_complete.RData")



Largerec<-subset(Recall_info, totaalaantalvoertuigenterugroepactie>=1000000 & nationaalopgegevenaantalvoertuigenterugroepactie>=10000)


#Get which versions have a large recall for some or all their cars
Temp<-subset(Panel_all, select=id_ver,  Panel_all$referentiecoderdw %in% Largerec$referentiecoderdw )
Temp1<-as.character(unique(Temp$id_ver))
#Remove versions with large recalls (including non recalled cars)
Panel_all<-subset(Panel_all, !c(id_ver %in% Temp1)   )

Panel_all$id_ver_rec<-as.factor(paste(Panel_all$typegoedkeuringsnummer, Panel_all$variant, Panel_all$uitvoering, Panel_all$recalled))

length(unique(Panel_all$kenteken))

nrow(Panel_all)





Temp<-subset(Panel_all, Panel_all$APKtot==0 & Panel_all$age_car>=4)
length(unique(Temp$kenteken))
nrow(Temp)

#rm(Panel_all)
#gc()
print("APK 0, BASELINE")
reg.apk0<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date + recalled | id_ver | 0 | id_type_date, data = Temp)
summary(reg.apk0)

print("APK 0, RECALLED-VERSION FE")
reg.apk0<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date | id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.apk0)

print("APK 0, TIME-VERSION FE")
reg.apk0<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + recalled | id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.apk0)

print("APK 0, TIME-VERSION + RECALLED-VERSION FE")
reg.apk0<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d| id_ver_rec + id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.apk0)


Temp<-subset(Panel_all, Panel_all$APKtot>=1 & Panel_all$age_car>=4)
length(unique(Temp$kenteken))
nrow(Temp)

rm(Panel_all)
gc()
print("APK 1, BASELINE")
reg.apk1<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date + recalled | id_ver | 0 | id_type_date, data = Temp)
summary(reg.apk1)

print("APK 1, RECALLED-VERSION FE")
reg.apk1<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date | id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.apk1)

print("APK 1, TIME-VERSION FE")
reg.apk1<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + recalled | id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.apk1)

print("APK 1, TIME-VERSION + RECALLED-VERSION FE")
reg.apk1<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d| id_ver_rec + id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.apk1)


#### TABLE D.4 - RESULTS BY BRAND RATING, SEPARATE REGRESSIONS, NO LARGE RECALLS ####


rm(list=ls()); gc()
objects()




load("Recall_info_2019.RData")


load("Panel_all_complete.RData")



Largerec<-subset(Recall_info, totaalaantalvoertuigenterugroepactie>=1000000 & nationaalopgegevenaantalvoertuigenterugroepactie>=10000)




#Get which versions have a large recall for some or all their cars
Temp<-subset(Panel_all, select=id_ver,  Panel_all$referentiecoderdw %in% Largerec$referentiecoderdw )
Temp1<-as.character(unique(Temp$id_ver))
#6485 versions dropped
#Remove versions with large recalls (including non recalled cars)
Panel_all<-subset(Panel_all, !c(id_ver %in% Temp1)   )



Temp<-subset(Panel_all, is.na(Panel_all$rating_merk)==FALSE & Panel_all$rating_merk<7)
nrow(Temp)

length(as.character(unique(Temp$kenteken)))



#Temp1<-subset(Temp, date=="2019-03-01")
#Temp2<-aggregate(rec_new_t ~ id_ver, data=Temp1, FUN="mean")
#Temp3<-subset(Temp2, rec_new_t>0 & rec_new_t<1)
#nrow(Temp3)
#nrow(Temp3)/nrow(Temp2)


print("RATING BELOW 7, BASELINE")
reg.ratinglow<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date + recalled | id_ver | 0 | id_type_date, data = Temp)
summary(reg.ratinglow)

print("RATING BELOW 7, RECALLED-VERSION FE")
reg.ratinglow<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date | id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.ratinglow)

print("RATING BELOW 7, TIME-VERSION FE")
reg.ratinglow<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + recalled | id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.ratinglow)

print("RATING BELOW 7, TIME-VERSION + RECALLED-VERSION FE")
reg.ratinglow<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d| id_ver_rec + id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.ratinglow)


Temp<-subset(Panel_all, is.na(Panel_all$rating_merk)==FALSE & Panel_all$rating_merk>=7)
nrow(Temp)

length(as.character(unique(Temp$kenteken)))



#Temp1<-subset(Temp, date=="2019-03-01")
#Temp2<-aggregate(rec_new_t ~ id_ver, data=Temp1, FUN="mean")
#Temp3<-subset(Temp2, rec_new_t>0 & rec_new_t<1)
#nrow(Temp3)
#nrow(Temp3)/nrow(Temp2)



nrow(Temp)
print("RATING 7 AND ABOVE, BASELINE")
reg.ratinghigh<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date + recalled | id_ver | 0 | id_type_date, data = Temp)
summary(reg.ratinghigh)

print("RATING 7 AND ABOVE, RECALLED-VERSION FE")
reg.ratinghigh<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + date | id_ver_rec | 0 | id_type_date, data = Temp)
summary(reg.ratinghigh)

print("RATING 7 AND ABOVE, TIME-VERSION FE")
reg.ratinghigh<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d + recalled | id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.ratinghigh)

print("RATING 7 AND ABOVE, TIME-VERSION + RECALLED-VERSION FE")
reg.ratinghigh<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d| id_ver_rec + id_ver_date | 0 | id_type_date, data = Temp)
summary(reg.ratinghigh)

#### FIGURE E.1 ####


rm(list=ls()); gc()
objects()

options(scipen=10000)

load("Panel_all_complete.RData")

Recall1<-subset(Panel_all, recall_new=="2017-11-01" & is.na(recall_new)==FALSE & datumeersteafgiftenederland<"2017-11-01" )
Statsrec1<-aggregate(rec_fixed_t ~ date, data=Recall1, FUN=mean)
Statsrec1$dist<-as.numeric(year(Statsrec1$date)*12+month(Statsrec1$date)-year(as.Date("2017-11-01"))*12-month(as.Date("2017-11-01")))
Recall2<-subset(Panel_all, recall_new=="2017-12-01" & is.na(recall_new)==FALSE & datumeersteafgiftenederland<"2017-11-01" & date!="2017-11-01")


Statsrec2<-aggregate(rec_fixed_t ~ date, data=Recall2, FUN=mean)
Statsrec2$dist<-as.numeric(year(Statsrec2$date)*12+month(Statsrec2$date)-year(as.Date("2017-12-01"))*12-month(as.Date("2017-12-01")))
Recall3<-subset(Panel_all, recall_new=="2018-01-01" & is.na(recall_new)==FALSE & datumeersteafgiftenederland<"2017-11-01")
Statsrec3<-aggregate(rec_fixed_t ~ date, data=Recall3, FUN=mean)
Statsrec3$dist<-as.numeric(year(Statsrec3$date)*12+month(Statsrec3$date)-year(as.Date("2018-01-01"))*12-month(as.Date("2018-01-01")))
Statsrec3<-subset(Statsrec3, dist>=0)
Recall4<-subset(Panel_all, recall_new=="2018-02-01" & is.na(recall_new)==FALSE & datumeersteafgiftenederland<"2017-11-01")
Statsrec4<-aggregate(rec_fixed_t ~ date, data=Recall4, FUN=mean)
Statsrec4$dist<-as.numeric(year(Statsrec4$date)*12+month(Statsrec4$date)-year(as.Date("2018-02-01"))*12-month(as.Date("2018-02-01")))
Statsrec4<-subset(Statsrec4, dist>=0)

Statsrec1$group<-as.factor("Nov 2017")
Statsrec2$group<-as.factor("Dec 2017")
Statsrec3$group<-as.factor("Jan 2018")
Statsrec4$group<-as.factor("Feb 2018")

Statsrec<-rbind(Statsrec1, Statsrec2, Statsrec3, Statsrec4 )
factor(Statsrec$group, levels = c("Nov 2017", "Dec 2017", "Jan 2018", "Feb 2018"))

Timetofix<-ggplot()+
  geom_line(data=Statsrec, aes(x=dist, y=rec_fixed_t, linetype=group), size=1)+
  scale_linetype_manual(values=c("solid", "longdash", "dashed", "dotdash"))+
  labs(x="Distance from recall (months)", y="Share of vehicles fixed", linetype="Start date")+
  theme(text=element_text(size=16),plot.title = element_text(hjust = 0.5))
Timetofix


ggsave("Timetofix.pdf", plot=Timetofix, width=8, height=5)


#### FIGURE E.2  - RECALLS AND RESALES OVER TIME, BY REPAIR STATUS####



rm(list=ls()); gc()
objects()



load("Panel_all_complete.RData")

Panel_all<-subset(Panel_all, select=c(resale, rec_new_t, rec_fixed_t, recalled, date, age_car_d, difflastsaleyr_d,
                                      id_ver, id_type_date, id_ver_rec, id_ver_date, rec_precat2, rec_precat3, rec_precat4, rec_postcat1, rec_postcat2, rec_postcat3, rec_postcat4  ))
gc()
Panel_all$recfix_postcat1<-0
Panel_all$recfix_postcat1[Panel_all$rec_postcat1==1 & Panel_all$rec_fixed_t==1]<-1
Panel_all$recfix_postcat2<-0
Panel_all$recfix_postcat2[Panel_all$rec_postcat2==1 & Panel_all$rec_fixed_t==1]<-1
Panel_all$recfix_postcat3<-0
Panel_all$recfix_postcat3[Panel_all$rec_postcat3==1 & Panel_all$rec_fixed_t==1]<-1
Panel_all$recfix_postcat4<-0
Panel_all$recfix_postcat4[Panel_all$rec_postcat4==1 & Panel_all$rec_fixed_t==1]<-1

Panel_all$recnofix_postcat1<-0
Panel_all$recnofix_postcat1[Panel_all$rec_postcat1==1 & Panel_all$rec_fixed_t==0]<-1
Panel_all$recnofix_postcat2<-0
Panel_all$recnofix_postcat2[Panel_all$rec_postcat2==1 & Panel_all$rec_fixed_t==0]<-1
Panel_all$recnofix_postcat3<-0
Panel_all$recnofix_postcat3[Panel_all$rec_postcat3==1 & Panel_all$rec_fixed_t==0]<-1
Panel_all$recnofix_postcat4<-0
Panel_all$recnofix_postcat4[Panel_all$rec_postcat4==1 & Panel_all$rec_fixed_t==0]<-1




# #PRE-TREND TEST, VERSION FE
reg.pretrend1<- felm(resale ~ rec_precat2 + rec_precat3 + rec_precat4 + recnofix_postcat1 + recnofix_postcat2 + recnofix_postcat3 + recnofix_postcat4 + recfix_postcat1 + recfix_postcat2 + recfix_postcat3 +recfix_postcat4 + recalled + date + age_car_d + difflastsaleyr_d| id_ver | 0 | id_type_date, data = Panel_all)
summary(reg.pretrend1)

#Put in graph
conf95_1<-confint(reg.pretrend1, level=0.95)
conf95pre<-subset(conf95_1, str_detect(row.names(conf95_1),"rec_pre.*?"))
conf95nofix<-subset(conf95_1, str_detect(row.names(conf95_1),"recnofix.*?"))
conf95fix<-subset(conf95_1, str_detect(row.names(conf95_1),"recfix.*?"))
conf90_1<-confint(reg.pretrend1, level=0.90)
conf90pre<-subset(conf90_1, str_detect(row.names(conf95_1),"rec_pre.*?"))
conf90nofix<-subset(conf90_1, str_detect(row.names(conf95_1),"recnofix.*?"))
conf90fix<-subset(conf90_1, str_detect(row.names(conf95_1),"recfix.*?"))
coef_1<-coef(reg.pretrend1)
coefpre<-subset(coef_1, str_detect(names(coef_1),"rec_pre.*?"))
coefnofix<-subset(coef_1, str_detect(names(coef_1),"recnofix.*?"))
coeffix<-subset(coef_1, str_detect(names(coef_1),"recfix.*?"))
graphpre<-cbind(names(coefpre),coefpre, conf95pre, conf90pre)
graphnofix<-cbind(names(coefnofix),coefnofix, conf95nofix, conf90nofix)
graphfix<-cbind(names(coeffix),coeffix, conf95fix, conf90fix)
graphpre<-rbind(graphpre, c("rec_precat1", 0, 0, 0, 0, 0))
graphpre<-cbind(graphpre, c(-2,-3,-4,-1))
graphnofix<-cbind(graphnofix, c(0,1,2,3))
graphfix<-cbind(graphfix, c(0,1,2,3))
colnames(graphpre)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
colnames(graphnofix)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
colnames(graphfix)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
graphpre<-as.data.frame(graphpre,stringsAsFactors=FALSE)
graphnofix<-as.data.frame(graphnofix,stringsAsFactors=FALSE)
graphfix<-as.data.frame(graphfix,stringsAsFactors=FALSE)

graphpre$coef<-as.numeric(as.character(graphpre$coef))
graphpre$conf95min<-as.numeric(as.character(graphpre$conf95min))
graphpre$conf90min<-as.numeric(as.character(graphpre$conf90min))
graphpre$conf95max<-as.numeric(as.character(graphpre$conf95max))
graphpre$conf90max<-as.numeric(as.character(graphpre$conf90max))
graphpre$pos<-as.numeric(as.character(graphpre$pos))

graphnofix$coef<-as.numeric(as.character(graphnofix$coef))
graphnofix$conf95min<-as.numeric(as.character(graphnofix$conf95min))
graphnofix$conf90min<-as.numeric(as.character(graphnofix$conf90min))
graphnofix$conf95max<-as.numeric(as.character(graphnofix$conf95max))
graphnofix$conf90max<-as.numeric(as.character(graphnofix$conf90max))
graphnofix$pos<-as.numeric(as.character(graphnofix$pos))

graphfix$coef<-as.numeric(as.character(graphfix$coef))
graphfix$conf95min<-as.numeric(as.character(graphfix$conf95min))
graphfix$conf90min<-as.numeric(as.character(graphfix$conf90min))
graphfix$conf95max<-as.numeric(as.character(graphfix$conf95max))
graphfix$conf90max<-as.numeric(as.character(graphfix$conf90max))
graphfix$pos<-as.numeric(as.character(graphfix$pos))

graph<-rbind(graphnofix, graphpre)
graph$fix<-"No"
graphfix$fix<-"Yes"
graph<-rbind(graph, graphfix)

graphfix1<-graph

save(graphfix1, file="Graphfix1.RData")
rm(reg.pretrend1)
gc()

print("PRE-TREND TEST, VERSION BY RECALL STATUS FE")
reg.pretrend2<- felm(resale ~ rec_precat2 + rec_precat3 + rec_precat4 + recnofix_postcat1 + recnofix_postcat2 + recnofix_postcat3 + recnofix_postcat4 + recfix_postcat1 + recfix_postcat2 + recfix_postcat3 +recfix_postcat4 + recalled + date + age_car_d + difflastsaleyr_d| id_ver_rec | 0 | id_type_date, data = Panel_all)
summary(reg.pretrend2)



#Put in graph
conf95_1<-confint(reg.pretrend2, level=0.95)
conf95pre<-subset(conf95_1, str_detect(row.names(conf95_1),"rec_pre.*?"))
conf95nofix<-subset(conf95_1, str_detect(row.names(conf95_1),"recnofix.*?"))
conf95fix<-subset(conf95_1, str_detect(row.names(conf95_1),"recfix.*?"))
conf90_1<-confint(reg.pretrend2, level=0.90)
conf90pre<-subset(conf90_1, str_detect(row.names(conf95_1),"rec_pre.*?"))
conf90nofix<-subset(conf90_1, str_detect(row.names(conf95_1),"recnofix.*?"))
conf90fix<-subset(conf90_1, str_detect(row.names(conf95_1),"recfix.*?"))
coef_1<-coef(reg.pretrend2)
coefpre<-subset(coef_1, str_detect(names(coef_1),"rec_pre.*?"))
coefnofix<-subset(coef_1, str_detect(names(coef_1),"recnofix.*?"))
coeffix<-subset(coef_1, str_detect(names(coef_1),"recfix.*?"))
graphpre<-cbind(names(coefpre),coefpre, conf95pre, conf90pre)
graphnofix<-cbind(names(coefnofix),coefnofix, conf95nofix, conf90nofix)
graphfix<-cbind(names(coeffix),coeffix, conf95fix, conf90fix)
graphpre<-rbind(graphpre, c("rec_precat1", 0, 0, 0, 0, 0))
graphpre<-cbind(graphpre, c(-2,-3,-4,-1))
graphnofix<-cbind(graphnofix, c(0,1,2,3))
graphfix<-cbind(graphfix, c(0,1,2,3))
colnames(graphpre)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
colnames(graphnofix)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
colnames(graphfix)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
graphpre<-as.data.frame(graphpre,stringsAsFactors=FALSE)
graphnofix<-as.data.frame(graphnofix,stringsAsFactors=FALSE)
graphfix<-as.data.frame(graphfix,stringsAsFactors=FALSE)

graphpre$coef<-as.numeric(as.character(graphpre$coef))
graphpre$conf95min<-as.numeric(as.character(graphpre$conf95min))
graphpre$conf90min<-as.numeric(as.character(graphpre$conf90min))
graphpre$conf95max<-as.numeric(as.character(graphpre$conf95max))
graphpre$conf90max<-as.numeric(as.character(graphpre$conf90max))
graphpre$pos<-as.numeric(as.character(graphpre$pos))

graphnofix$coef<-as.numeric(as.character(graphnofix$coef))
graphnofix$conf95min<-as.numeric(as.character(graphnofix$conf95min))
graphnofix$conf90min<-as.numeric(as.character(graphnofix$conf90min))
graphnofix$conf95max<-as.numeric(as.character(graphnofix$conf95max))
graphnofix$conf90max<-as.numeric(as.character(graphnofix$conf90max))
graphnofix$pos<-as.numeric(as.character(graphnofix$pos))

graphfix$coef<-as.numeric(as.character(graphfix$coef))
graphfix$conf95min<-as.numeric(as.character(graphfix$conf95min))
graphfix$conf90min<-as.numeric(as.character(graphfix$conf90min))
graphfix$conf95max<-as.numeric(as.character(graphfix$conf95max))
graphfix$conf90max<-as.numeric(as.character(graphfix$conf90max))
graphfix$pos<-as.numeric(as.character(graphfix$pos))

graph<-rbind(graphnofix, graphpre)
graph$fix<-"No"
graphfix$fix<-"Yes"
graph<-rbind(graph, graphfix)

graphfix2<-graph

save(graphfix2, file="Graphfix2.RData")
rm(reg.pretrend2)
gc()

print("PRE-TREND TEST, VERSION BY TIME-VERSION STATUS FE")
reg.pretrend3<- felm(resale ~ rec_precat2 + rec_precat3 + rec_precat4 + recnofix_postcat1 + recnofix_postcat2 + recnofix_postcat3 + recnofix_postcat4+ recfix_postcat1 + recfix_postcat2 + recfix_postcat3 +recfix_postcat4 + recalled + date + age_car_d + difflastsaleyr_d| id_ver_date | 0 | id_type_date, data = Panel_all)
summary(reg.pretrend3)

#Put in graph
conf95_1<-confint(reg.pretrend3, level=0.95)
conf95pre<-subset(conf95_1, str_detect(row.names(conf95_1),"rec_pre.*?"))
conf95nofix<-subset(conf95_1, str_detect(row.names(conf95_1),"recnofix.*?"))
conf95fix<-subset(conf95_1, str_detect(row.names(conf95_1),"recfix.*?"))
conf90_1<-confint(reg.pretrend3, level=0.90)
conf90pre<-subset(conf90_1, str_detect(row.names(conf95_1),"rec_pre.*?"))
conf90nofix<-subset(conf90_1, str_detect(row.names(conf95_1),"recnofix.*?"))
conf90fix<-subset(conf90_1, str_detect(row.names(conf95_1),"recfix.*?"))
coef_1<-coef(reg.pretrend3)
coefpre<-subset(coef_1, str_detect(names(coef_1),"rec_pre.*?"))
coefnofix<-subset(coef_1, str_detect(names(coef_1),"recnofix.*?"))
coeffix<-subset(coef_1, str_detect(names(coef_1),"recfix.*?"))
graphpre<-cbind(names(coefpre),coefpre, conf95pre, conf90pre)
graphnofix<-cbind(names(coefnofix),coefnofix, conf95nofix, conf90nofix)
graphfix<-cbind(names(coeffix),coeffix, conf95fix, conf90fix)
graphpre<-rbind(graphpre, c("rec_precat1", 0, 0, 0, 0, 0))
graphpre<-cbind(graphpre, c(-2,-3,-4,-1))
graphnofix<-cbind(graphnofix, c(0,1,2,3))
graphfix<-cbind(graphfix, c(0,1,2,3))
colnames(graphpre)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
colnames(graphnofix)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
colnames(graphfix)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
graphpre<-as.data.frame(graphpre,stringsAsFactors=FALSE)
graphnofix<-as.data.frame(graphnofix,stringsAsFactors=FALSE)
graphfix<-as.data.frame(graphfix,stringsAsFactors=FALSE)

graphpre$coef<-as.numeric(as.character(graphpre$coef))
graphpre$conf95min<-as.numeric(as.character(graphpre$conf95min))
graphpre$conf90min<-as.numeric(as.character(graphpre$conf90min))
graphpre$conf95max<-as.numeric(as.character(graphpre$conf95max))
graphpre$conf90max<-as.numeric(as.character(graphpre$conf90max))
graphpre$pos<-as.numeric(as.character(graphpre$pos))

graphnofix$coef<-as.numeric(as.character(graphnofix$coef))
graphnofix$conf95min<-as.numeric(as.character(graphnofix$conf95min))
graphnofix$conf90min<-as.numeric(as.character(graphnofix$conf90min))
graphnofix$conf95max<-as.numeric(as.character(graphnofix$conf95max))
graphnofix$conf90max<-as.numeric(as.character(graphnofix$conf90max))
graphnofix$pos<-as.numeric(as.character(graphnofix$pos))

graphfix$coef<-as.numeric(as.character(graphfix$coef))
graphfix$conf95min<-as.numeric(as.character(graphfix$conf95min))
graphfix$conf90min<-as.numeric(as.character(graphfix$conf90min))
graphfix$conf95max<-as.numeric(as.character(graphfix$conf95max))
graphfix$conf90max<-as.numeric(as.character(graphfix$conf90max))
graphfix$pos<-as.numeric(as.character(graphfix$pos))

graph<-rbind(graphnofix, graphpre)
graph$fix<-"No"
graphfix$fix<-"Yes"
graph<-rbind(graph, graphfix)

graphfix3<-graph

save(graphfix3, file="Graphfix3.RData")
rm(reg.pretrend3)
gc()


reg.pretrend4<- felm(resale ~ rec_precat2 + rec_precat3 + rec_precat4 + recnofix_postcat1 + recnofix_postcat2 + recnofix_postcat3 + recnofix_postcat4+ recfix_postcat1 + recfix_postcat2 + recfix_postcat3 +recfix_postcat4 + recalled + date + age_car_d + difflastsaleyr_d| id_ver_rec + id_ver_date | 0 | id_type_date, data = Panel_all)
summary(reg.pretrend4)


#Put in graph
conf95_1<-confint(reg.pretrend4, level=0.95)
conf95pre<-subset(conf95_1, str_detect(row.names(conf95_1),"rec_pre.*?"))
conf95nofix<-subset(conf95_1, str_detect(row.names(conf95_1),"recnofix.*?"))
conf95fix<-subset(conf95_1, str_detect(row.names(conf95_1),"recfix.*?"))
conf90_1<-confint(reg.pretrend4, level=0.90)
conf90pre<-subset(conf90_1, str_detect(row.names(conf95_1),"rec_pre.*?"))
conf90nofix<-subset(conf90_1, str_detect(row.names(conf95_1),"recnofix.*?"))
conf90fix<-subset(conf90_1, str_detect(row.names(conf95_1),"recfix.*?"))
coef_1<-coef(reg.pretrend4)
coefpre<-subset(coef_1, str_detect(names(coef_1),"rec_pre.*?"))
coefnofix<-subset(coef_1, str_detect(names(coef_1),"recnofix.*?"))
coeffix<-subset(coef_1, str_detect(names(coef_1),"recfix.*?"))
graphpre<-cbind(names(coefpre),coefpre, conf95pre, conf90pre)
graphnofix<-cbind(names(coefnofix),coefnofix, conf95nofix, conf90nofix)
graphfix<-cbind(names(coeffix),coeffix, conf95fix, conf90fix)
graphpre<-rbind(graphpre, c("rec_precat1", 0, 0, 0, 0, 0))
graphpre<-cbind(graphpre, c(-2,-3,-4,-1))
graphnofix<-cbind(graphnofix, c(0,1,2,3))
graphfix<-cbind(graphfix, c(0,1,2,3))
colnames(graphpre)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
colnames(graphnofix)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
colnames(graphfix)<-c("name", "coef", "conf95min",  "conf95max","conf90min",  "conf90max", "pos")
graphpre<-as.data.frame(graphpre,stringsAsFactors=FALSE)
graphnofix<-as.data.frame(graphnofix,stringsAsFactors=FALSE)
graphfix<-as.data.frame(graphfix,stringsAsFactors=FALSE)

graphpre$coef<-as.numeric(as.character(graphpre$coef))
graphpre$conf95min<-as.numeric(as.character(graphpre$conf95min))
graphpre$conf90min<-as.numeric(as.character(graphpre$conf90min))
graphpre$conf95max<-as.numeric(as.character(graphpre$conf95max))
graphpre$conf90max<-as.numeric(as.character(graphpre$conf90max))
graphpre$pos<-as.numeric(as.character(graphpre$pos))

graphnofix$coef<-as.numeric(as.character(graphnofix$coef))
graphnofix$conf95min<-as.numeric(as.character(graphnofix$conf95min))
graphnofix$conf90min<-as.numeric(as.character(graphnofix$conf90min))
graphnofix$conf95max<-as.numeric(as.character(graphnofix$conf95max))
graphnofix$conf90max<-as.numeric(as.character(graphnofix$conf90max))
graphnofix$pos<-as.numeric(as.character(graphnofix$pos))

graphfix$coef<-as.numeric(as.character(graphfix$coef))
graphfix$conf95min<-as.numeric(as.character(graphfix$conf95min))
graphfix$conf90min<-as.numeric(as.character(graphfix$conf90min))
graphfix$conf95max<-as.numeric(as.character(graphfix$conf95max))
graphfix$conf90max<-as.numeric(as.character(graphfix$conf90max))
graphfix$pos<-as.numeric(as.character(graphfix$pos))

graph<-rbind(graphnofix, graphpre)
graph$fix<-"No"
graphfix$fix<-"Yes"
graph<-rbind(graph, graphfix)

graphfix4<-graph

save(graphfix4, file="Graphfix4.RData")
rm(reg.pretrend4)
gc()




max95<-ceiling(1000*max(rbind(graphfix1$conf95max, graphfix2$conf95max, graphfix3$conf95max, graphfix4$conf95max)))/1000
min95<-floor(1000*min(rbind(graphfix1$conf95min, graphfix2$conf95min, graphfix3$conf95min, graphfix4$conf95min)))/1000

graphfix1a<-rbind(graphfix1, c("rec_precat1", 0, 0, 0,0, 0, -1, "Yes"))
graphfix1a$coef<-as.numeric(as.character(graphfix1a$coef))
graphfix1a$conf95min<-as.numeric(as.character(graphfix1a$conf95min))
graphfix1a$conf90min<-as.numeric(as.character(graphfix1a$conf90min))
graphfix1a$conf95max<-as.numeric(as.character(graphfix1a$conf95max))
graphfix1a$conf90max<-as.numeric(as.character(graphfix1a$conf90max))
graphfix1a$pos<-as.numeric(as.character(graphfix1a$pos))
plotfix1<-ggplot(graphfix1a, aes(x=pos, y=coef), group=fix) +
  geom_point(aes(shape=fix), size=4) +
  geom_ribbon(aes(ymin=conf95min, ymax=conf95max, linetype=fix), colour="black",  alpha=0.1)+
  geom_hline(yintercept = 0, size=0.5)+
  labs(shape = "Repaired", linetype = "Repaired")+
  xlab("Time to recall (3 months)") +
  ylab("Coefficient estimates") +
  ylim(min95, max95) +
  ggtitle("Version FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
plotfix1

graphfix2a<-rbind(graphfix2, c("rec_precat1", 0, 0, 0,0, 0, -1, "Yes"))
graphfix2a$coef<-as.numeric(as.character(graphfix2a$coef))
graphfix2a$conf95min<-as.numeric(as.character(graphfix2a$conf95min))
graphfix2a$conf90min<-as.numeric(as.character(graphfix2a$conf90min))
graphfix2a$conf95max<-as.numeric(as.character(graphfix2a$conf95max))
graphfix2a$conf90max<-as.numeric(as.character(graphfix2a$conf90max))
graphfix2a$pos<-as.numeric(as.character(graphfix2a$pos))
plotfix2<-ggplot(graphfix2a, aes(x=pos, y=coef), group=fix) +
  geom_point(aes(shape=fix), size=4) +
  geom_ribbon(aes(ymin=conf95min, ymax=conf95max, linetype=fix), colour="black",  alpha=0.1)+
  geom_hline(yintercept = 0, size=0.5)+
  labs(shape = "Repaired", linetype = "Repaired")+
  xlab("Time to recall (3 months)") +
  ylab("Coefficient estimates") +
  ylim(min95, max95) +
  ggtitle("Version-Recall FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
plotfix2

graphfix3a<-rbind(graphfix3, c("rec_precat1", 0, 0, 0,0, 0, -1, "Yes"))
graphfix3a$coef<-as.numeric(as.character(graphfix3a$coef))
graphfix3a$conf95min<-as.numeric(as.character(graphfix3a$conf95min))
graphfix3a$conf90min<-as.numeric(as.character(graphfix3a$conf90min))
graphfix3a$conf95max<-as.numeric(as.character(graphfix3a$conf95max))
graphfix3a$conf90max<-as.numeric(as.character(graphfix3a$conf90max))
graphfix3a$pos<-as.numeric(as.character(graphfix3a$pos))
plotfix3<-ggplot(graphfix3a, aes(x=pos, y=coef), group=fix) +
  geom_point(aes(shape=fix), size=4) +
  geom_ribbon(aes(ymin=conf95min, ymax=conf95max, linetype=fix), colour="black",  alpha=0.1)+
  geom_hline(yintercept = 0, size=0.5)+
  labs(shape = "Repaired", linetype = "Repaired")+
  xlab("Time to recall (3 months)") +
  ylab("Coefficient estimates") +
  ylim(min95, max95) +
  ggtitle("Version-Time FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
plotfix3

graphfix4a<-rbind(graphfix4, c("rec_precat1", 0, 0, 0,0, 0, -1, "Yes"))
graphfix4a$coef<-as.numeric(as.character(graphfix4a$coef))
graphfix4a$conf95min<-as.numeric(as.character(graphfix4a$conf95min))
graphfix4a$conf90min<-as.numeric(as.character(graphfix4a$conf90min))
graphfix4a$conf95max<-as.numeric(as.character(graphfix4a$conf95max))
graphfix4a$conf90max<-as.numeric(as.character(graphfix4a$conf90max))
graphfix4a$pos<-as.numeric(as.character(graphfix4a$pos))
plotfix4<-ggplot(graphfix4a, aes(x=pos, y=coef), group=fix) +
  geom_point(aes(shape=fix), size=4) +
  geom_ribbon(aes(ymin=conf95min, ymax=conf95max, linetype=fix), colour="black",  alpha=0.1)+
  geom_hline(yintercept = 0, size=0.5)+
  labs(shape = "Repaired", linetype = "Repaired")+
  xlab("Time to recall (3 months)") +
  ylab("Coefficient estimates") +
  ylim(min95, max95) +
  ggtitle("Version-Recall, Version-Time FE") +
  scale_x_continuous(breaks=c(-4,-3,-2,-1,0,1,2,3), labels=c(expression(""<=4),-3,-2,-1,0,1,2,expression("">=3)))+
  theme(text=element_text(size=14),plot.title = element_text(hjust = 0.5))
plotfix4


paralleltestfix<-grid.arrange(plotfix1, plotfix2,plotfix3, plotfix4 , ncol = 2, nrow = 2)
paralleltestfix
ggsave("Paralleltest_fix.pdf", plot=paralleltestfix, width=9, height=5)



####APPENDIX F FIGURES AND TABLES   #####

rm(list=ls()); gc()
objects()



load("Panel_all_complete.RData")
load("Recall_info_2019.RData")

Temp<-subset(Panel_all, is.na(referentiecoderdw)==FALSE)

Temp2<-merge(Temp, Recall_info, by="referentiecoderdw", all.x=TRUE, all.y=FALSE)



#FIGURE F.1 - SHARE OF RECALLS BY VEHICLE PART, PER SALE PRICE GROUP
#length(unique(Temp2$kenteken[is.na(Temp2$realprice)==FALSE]))

Tempprice<-aggregate(realprice ~ kenteken + rec_part, data=subset(Temp2, is.na(realprice)==FALSE), FUN="mean")

Tempprice$priceg<-""
Tempprice$priceg[Tempprice$realprice<30000]<-"less than 30K"
Tempprice$priceg[Tempprice$realprice>=30000]<-"30K or more"

Tempprice$num<-1
Sumprice<-aggregate(num ~ priceg + rec_part, data=Tempprice, FUN="sum")
Totprice<-aggregate(num ~ priceg, data=Tempprice, FUN="sum")
Totprice$tot<-Totprice$num
Totprice$num<-NULL
Statprice<-merge(Totprice, Sumprice, by="priceg")
Statprice$share<-Statprice$num/Statprice$tot


Priceplot<-ggplot(Statprice, aes(x=rec_part, y=share, group=priceg))+
  geom_bar(stat="identity",position="dodge", aes(fill=priceg))+
  ylim(0,0.35)+
  labs(fill="Listed price")+
  xlab("Part recalled")+
  ylab("Share vehicles")+
  scale_fill_grey()+
  theme(axis.text.x = element_text(angle = 90, vjust=0.5), text=element_text(size=16))
ggsave("Share_part_price.pdf", plot=Priceplot, width=8, height=5)
Priceplot


#FIGURE F.2 - SHARE OF RECALLS BY VEHICLE PART, PER DEFECT NUMBER GROUP
#length(unique(Temp2$kenteken[Temp2$age_car>=4]))

Tempapk<-aggregate(APKtot ~ kenteken + rec_part, data=subset(Temp2, age_car>=4), FUN="mean")

Tempapk$apkg<-""
Tempapk$apkg[Tempapk$APKtot==0]<-"0"
Tempapk$apkg[Tempapk$APKtot>0]<-"1 or more"

Tempapk$num<-1
Sumapk<-aggregate(num ~ apkg + rec_part, data=Tempapk, FUN="sum")
Totapk<-aggregate(num ~ apkg, data=Tempapk, FUN="sum")
Totapk$tot<-Totapk$num
Totapk$num<-NULL
Statapk<-merge(Totapk, Sumapk, by="apkg")
Statapk$share<-Statapk$num/Statapk$tot

Apkplot<-ggplot(Statapk, aes(x=rec_part, y=share, group=apkg))+
  geom_bar(stat="identity",position="dodge", aes(fill=apkg))+
  ylim(0,0.35)+
  labs(fill="APK defects")+
  xlab("Part recalled")+
  ylab("Share vehicles")+
  scale_fill_grey()+
  theme(axis.text.x = element_text(angle = 90, vjust=0.5), text=element_text(size=16))
ggsave("Share_part_apk.pdf", plot=Apkplot, width=8, height=5)
Apkplot

#FIGURE F.3 - SHARE OF RECALLS BY VEHICLE PART, PER BRAND RATING GROUP
length(unique(Temp2$kenteken[is.na(Temp2$rating_merk)==FALSE]))

Temprating<-aggregate(rating_merk ~ kenteken + rec_part, data=subset(Temp2, is.na(rating_merk)==FALSE), FUN="mean")

Temprating$ratingg<-""
Temprating$ratingg[Temprating$rating_merk<7]<-"Less than 7"
Temprating$ratingg[Temprating$rating_merk>=7]<-"7 or more"

Temprating$num<-1
Sumrating<-aggregate(num ~ ratingg + rec_part, data=Temprating, FUN="sum")
Totrating<-aggregate(num ~ ratingg, data=Temprating, FUN="sum")
Totrating$tot<-Totrating$num
Totrating$num<-NULL
Statrating<-merge(Totrating, Sumrating, by="ratingg")
Statrating$share<-Statrating$num/Statrating$tot

Ratingplot<-ggplot(Statrating, aes(x=rec_part, y=share, group=ratingg))+
  geom_bar(stat="identity",position="dodge", aes(fill=ratingg))+
  ylim(0,0.35)+
  labs(fill="Brand rating")+
  xlab("Part recalled")+
  ylab("Share vehicles")+
  scale_fill_grey()+
  theme(axis.text.x = element_text(angle = 90, vjust=0.5), text=element_text(size=16))
Ratingplot
ggsave("Share_part_rating.pdf", plot=Ratingplot, width=8, height=5)

#TABLE F.1 - HETEROGENEITY REGRESSIONS BY TYPE OF PART RECALLED

part_ver<-unique(subset(Temp2, select=c(rec_part, id_ver) ))
#NOT CORE FUNCTIONING
nononcore_ver<-subset(part_ver, rec_part!="Approval invalid" & rec_part!="Bodywork" & rec_part!="Doors" & rec_part!="General construction" & rec_part!="Lights" & rec_part!="Seats" & rec_part!="Suspensions" & rec_part!="Trailer connection" & rec_part!="Windows")
#Remove all the cars in versions with fundamental part recalls
Temp3<-subset(Panel_all, !c(id_ver %in% nononcore_ver$id_ver ))
rm(nononcore_ver)

Temp4<-subset(Temp3, is.na(realprice)==FALSE & realprice<30000)

print("PRICE <30K REGRESSION, VERSION BY TIME FE")
reg.notfund<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp4)
summary(reg.notfund)
rm(reg.notfund)
gc()

Temp4<-subset(Temp3, is.na(realprice)==FALSE & realprice>=30000)

print("PRICE >=30K REGRESSION, VERSION BY TIME FE")
reg.notfund<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp4)
summary(reg.notfund)
rm(reg.notfund)
gc()

Temp4<-subset(Temp3, age_car>=4 & APKtot==0)
print("APK 0, TIME-VERSION + RECALLED-VERSION FE")
reg.notfund<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp4)
summary(reg.notfund)
rm(reg.notfund)
gc()

Temp4<-subset(Temp3, age_car>=4 & APKtot>=1)
print("APK 1, TIME-VERSION + RECALLED-VERSION FE")
reg.notfund<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp4)
summary(reg.notfund)
rm(reg.notfund)
gc()

Temp4<-subset(Temp3, is.na(rating_merk)==FALSE & rating_merk<7)
print("RATING <7, TIME-VERSION + RECALLED-VERSION FE")
reg.notfund<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp4)
summary(reg.notfund)
rm(reg.notfund)
gc()

Temp4<-subset(Temp3, is.na(rating_merk)==FALSE & rating_merk>=7)
print("RATING >=7, TIME-VERSION + RECALLED-VERSION FE")
reg.notfund<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp4)
summary(reg.notfund)
rm(reg.notfund)
gc()


#CORE FUNCTIONING
nocore_ver<-subset(part_ver, rec_part!="Protection" & rec_part!="Axles wheels" & rec_part!="Braking" & rec_part!="Electric" & rec_part!="Engine" & rec_part!="Motor part" & rec_part!="Steering" & rec_part!="Transmission")
#Remove all the cars in versions with recall except Protection
Temp3<-subset(Panel_all, !c(id_ver %in% nocore_ver$id_ver ))
rm(nocore_ver)

Temp4<-subset(Temp3, is.na(realprice)==FALSE & realprice<30000)

print("PRICE <30K REGRESSION, VERSION BY TIME FE")
reg.fund<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp4)
summary(reg.fund)
rm(reg.fund)
gc()

Temp4<-subset(Temp3, is.na(realprice)==FALSE & realprice>=30000)

print("PRICE >=30K REGRESSION, VERSION BY TIME FE")
reg.fund<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp4)
summary(reg.fund)
rm(reg.fund)
gc()

Temp4<-subset(Temp3, age_car>=4 & APKtot==0)
print("APK 0, TIME-VERSION + RECALLED-VERSION FE")
reg.fund<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp4)
summary(reg.fund)
rm(reg.fund)
gc()

Temp4<-subset(Temp3, age_car>=4 & APKtot>=1)
print("APK 1, TIME-VERSION + RECALLED-VERSION FE")
reg.fund<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp4)
summary(reg.fund)
rm(reg.fund)
gc()

Temp4<-subset(Temp3, is.na(rating_merk)==FALSE & rating_merk<7)
print("RATING <7, TIME-VERSION + RECALLED-VERSION FE")
reg.fund<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp4)
summary(reg.fund)
rm(reg.fund)
gc()

Temp4<-subset(Temp3, is.na(rating_merk)==FALSE & rating_merk>=7)
print("RATING >=7, TIME-VERSION + RECALLED-VERSION FE")
reg.fund<- felm(resale ~ rec_new_t  + age_car_d + difflastsaleyr_d | id_ver_date + id_ver_rec | 0 | id_type_date, data = Temp4)
summary(reg.fund)
rm(reg.fund)
gc()


#### APPENDIX G TABLES ####
#NOTE: For this code is necessary the asking price data, which are not publicily available. Please see the documentation.

rm(list=ls()); gc()
objects()




filedate_price<-str_match(list.files( "Data - (.*?).RData" ))[,2]
Files_price<-cbind.data.frame(filenames_price,as.Date(dmy(filedate_price)),stringsAsFactors=FALSE)
colnames(Files_price)<-c("path", "date")
Files_price<- Files_price[order(Files_price$date),]
Files_price$order<-order(Files_price$date)

rm(filedate_price, filenames_price)

#LOAD STARTING FILE (first)# 
load(Files_price$path[1])


#Remove first row, which is always empty
Data<-Data[-1,]


#Remove control checks
Data$comb<-NULL
Data$page<-NULL
Data$num<-NULL
Data$maxpag<-NULL

#Remove duplicate observations
Data<-subset(Data, duplicated(Data)==FALSE)

#Add date
Data$date<-Files_price$date[1]




#Replace with NA when plate not shown
Data$kenteken[Data$kenteken=="false"]<-NA
#Plate name always in uppercase
Data$kenteken<-toupper(Data$kenteken)
#Remove special characters (for instance, "-" between characters in licence plate)
Data$kenteken<-gsub("[[:punct:]]", "", Data$kenteken)

#Valid kenteken: 1) at least one number 2) at least one letter 3) length==6
Data$length<-str_length(Data$kenteken)==6
Data$numbers<-grepl("[[:digit:]]", Data$kenteken)
Data$letters<-grepl("[[:alpha:]]", Data$kenteken)
Data$valid<- Data$length==TRUE & Data$numbers==TRUE & Data$letters==TRUE
table(Data$valid, useNA="always")/nrow(Data)
#About 85% seemingly valid plates
Data$length<-NULL
Data$numbers<-NULL
Data$letters<-NULL

#Rename file
Sales<-Data
rm(Data)



#APPEND OTHER VEHICLES
for(j in 2:nrow(Files_price)) {
  
  load(Files_price$path[j])
  
  
  #Remove first row, which is always empty
  Data<-Data[-1,]
  
  #Remove control checks
  Data$comb<-NULL
  Data$page<-NULL
  Data$num<-NULL
  Data$maxpag<-NULL
  
  table(duplicated(Data))
  #Remove duplicate observations
  Data<-subset(Data, duplicated(Data)==FALSE)
  
  #Add date
  Data$date<-Files_price$date[j]
  
  
  #Replace with NA when plate not shown
  Data$kenteken[Data$kenteken=="false"]<-NA
  #Plate name always in uppercase
  Data$kenteken<-toupper(Data$kenteken)
  #Remove special characters (for instance, "-" between characters in licence plate)
  Data$kenteken<-gsub("[[:punct:]]", "", Data$kenteken)
  
  #Valid kenteken: 1) at least one number 2) at least one letter 3) length==6
  Data$length<-str_length(Data$kenteken)==6
  Data$numbers<-grepl("[[:digit:]]", Data$kenteken)
  Data$letters<-grepl("[[:alpha:]]", Data$kenteken)
  Data$valid<- Data$length==TRUE & Data$numbers==TRUE & Data$letters==TRUE
  table(Data$valid, useNA="always")/nrow(Data)
  #About 85% seemingly valid plates
  Data$length<-NULL
  Data$numbers<-NULL
  Data$letters<-NULL
  
  
  
  
  
  #Some cars captured in one round were also captured in the previous round. Keep only the earliest data
  kenteken1<-subset(Sales$kenteken, Sales$date==Files_price$date[(j-1)] & Sales$valid==TRUE)
  Data<-subset(Data, !c(kenteken %in% kenteken1))
  rm(kenteken1)
  
  Sales<-rbind(Sales, Data)
  rm(Data)
}

#Tot 1407405 obs

#Remove plates with only digits and less than 6 letters
Sales$kenteken[str_detect(Sales$kenteken, "[:digit:]{6}")]<-NA
Sales$kenteken[str_detect(Sales$kenteken, "^[:alnum:]{1,5}$")]<-NA


#Tot 1407405 obs

table(is.na(Sales$kenteken))/nrow(Sales)
#86.08% Valid


Sales<-subset(Sales, date<as.Date("2019-04-01"))
table(is.na(Sales$kenteken))/nrow(Sales)
#86.19% Valid
#775910 obs total (valid + invalid)

#Drop invalid observations (no kenteken)
Sales<-subset(Sales, is.na(kenteken)==FALSE)
#668815 obs

Sales$month<-month(Sales$date)
Sales$year<-year(Sales$date)

load("Data/Source data/Recalls_merged_2019.RData")

#Merge with recalls data
Sales<-merge(Sales, Data, by="kenteken", all.x=TRUE, all.y=FALSE)
#Remove if no type approval info
Sales<-subset(Sales, is.na(typegoedkeuringsnummer)==FALSE & typegoedkeuringsnummer!="")
Sales<-subset(Sales, valid==TRUE)
#518279 obs

rm(Data)

load("Link_Char.RData")
Link_Char<-subset(Link_Char, select=c(kenteken, catalogusprijs, merk))

Sales<-merge(Sales, Link_Char, by="kenteken", all.x=TRUE, all.y=FALSE)
rm(Link_Char)
gc()


#Rating brand
Sales$rating_merk<-NA
i<- str_detect(Sales$merk, "SUZUKI")
Sales$rating_merk[i]<-8.7
i<- str_detect(Sales$merk, "HONDA")
Sales$rating_merk[i]<-8.5
i<- str_detect(Sales$merk, "TOYOTA")
Sales$rating_merk[i]<-8.3
i<- str_detect(Sales$merk, "HYUNDAI")
Sales$rating_merk[i]<-8.1
i<- str_detect(Sales$merk, "MITSUBISHI")
Sales$rating_merk[i]<-8.1
i<- str_detect(Sales$merk, "KIA")
Sales$rating_merk[i]<-8.0
i<- str_detect(Sales$merk, "MAZDA")
Sales$rating_merk[i]<-7.7
i<- str_detect(Sales$merk, "DAIHATSU")
Sales$rating_merk[i]<-7.6
i<- str_detect(Sales$merk, "NISSAN")
Sales$rating_merk[i]<-7.6
i<- str_detect(Sales$merk, "DACIA")
Sales$rating_merk[i]<-7.4
i<- str_detect(Sales$merk, "MERCEDES")
Sales$rating_merk[i]<-7.4
i<- str_detect(Sales$merk, "FORD")
Sales$rating_merk[i]<-7.2
i<- str_detect(Sales$merk, "AUDI")
Sales$rating_merk[i]<-7.1
i<- str_detect(Sales$merk, "SUBARU")
Sales$rating_merk[i]<-7.1
i<- str_detect(Sales$merk, "FIAT")
Sales$rating_merk[i]<-7.0
i<- str_detect(Sales$merk, "BMW")
Sales$rating_merk[i]<-6.9
i<- str_detect(Sales$merk, "RENAULT")
Sales$rating_merk[i]<-6.7
i<- str_detect(Sales$merk, "OPEL")
Sales$rating_merk[i]<-6.6
i<- str_detect(Sales$merk, "VOLVO")
Sales$rating_merk[i]<-6.4
i<- str_detect(Sales$merk, "SKODA")
Sales$rating_merk[i]<-6.1
i<- str_detect(Sales$merk, "CITRO")
Sales$rating_merk[i]<-5.9
i<- str_detect(Sales$merk, "VOLKSWAGEN")
Sales$rating_merk[i]<-5.9
i<- str_detect(Sales$merk, "SEAT")
Sales$rating_merk[i]<-5.8
i<- str_detect(Sales$merk, "PEUGEOT")
Sales$rating_merk[i]<-5.5

load("Data/Source data/HCPI.RData")
HCPI$date<-as.Date(HCPI$date, format="%m/%d/%Y")
HCPI$month<-month(HCPI$date)
HCPI$year<-year(HCPI$date)
HCPI$date<-NULL
Sales$month<-month(Sales$date)
Sales$year<-year(Sales$date)
Sales<-merge(Sales, HCPI, by=c("month", "year"), all.x=TRUE, all.y=FALSE)
Sales$HCPI_res<-Sales$hcpi
Sales$hcpi<-NULL
Sales$month<-NULL
Sales$year<-NULL

Sales$month<-month(Sales$datumeerstetoelating)
Sales$year<-year(Sales$datumeerstetoelating)
Sales<-merge(Sales, HCPI, by=c("month", "year"), all.x=TRUE, all.y=FALSE)
Sales$HCPI_cat<-Sales$hcpi
Sales$hcpi<-NULL
Sales$month<-NULL
Sales$year<-NULL

Sales$price<-as.numeric(Sales$price)
Sales$rprice<-Sales$price/(Sales$HCPI_res/100)
Sales$rcatalogusprijs<-Sales$catalogusprijs/(Sales$HCPI_cat/100)
Sales$rdeprrate<-Sales$rprice/Sales$rcatalogusprijs

table(is.na(Sales$referentiecoderdw))

Sales$recalled<-0
Sales$recalled[is.na(Sales$referentiecoderdw)==FALSE]<-1

Sales$recalledpost<-0
i<-Sales$recalled==1 & Sales$date>Sales$recall_new
Sales$recalledpost[i]<-1

#
#Statistics on sales date
Temp<-subset(Sales, date<"2018-06-01")
Temp$distsale<-NA
for(i in 1:(nrow(Temp))){
  dates<-unlist(strsplit(Temp$datumtenaamstelling_all[i], ";"))
  #Get difference btw date in database and nearest change in registration
  distsale<-as.numeric(as.Date(dates, format="%d/%m/%Y")-Temp$date[i])  
  #Remove changes in registration before put on market
  distsale<-distsale[which(distsale>=0)]
  #Get closest change in registration
  if(length(distsale)>0){
    mindist<-min(distsale)
    #print(mindist)
    Temp$distsale[i]<-mindist
    rm(mindist)
  }
  rm(dates, distsale)
}
summary(Temp$distsale)
table(Temp$recalledpost)
summary(Temp$distsale[Temp$recalledpost==0])
summary(Temp$distsale[Temp$recalledpost==1])

Sales$id_ver<-as.factor(paste(Sales$typegoedkeuringsnummer,Sales$variant, Sales$uitvoering))

Sales$id_ver_rec<-as.factor(paste(Sales$typegoedkeuringsnummer,Sales$variant, Sales$uitvoering, Sales$recalled))
Sales$id_type<-as.factor(Sales$typegoedkeuringsnummer)

Sales$id_type_date<-as.factor(paste(Sales$typegoedkeuringsnummer, Sales$date1) )
Sales$age<-floor(((year(Sales$date)*12+month(Sales$date))- (year(Sales$datumeerstetoelating)*12+month(Sales$datumeerstetoelating)))/12) 
Sales$age_d<-as.factor(Sales$age)



Sales$date1<-as.factor(as.Date(paste(year(Sales$date), month(Sales$date), 1, sep="-" )))
Sales$id_ver_time<-as.factor(paste(Sales$typegoedkeuringsnummer,Sales$variant, Sales$uitvoering, Sales$date1))

Sales$km<-as.numeric(Sales$km)
Sales$km2<-Sales$km^2

Sales$km_d<-as.factor(round(Sales$km/10000))

Sales$vend_dealer<-0
Sales$vend_dealer[str_detect(Sales$vendor_data, "</span><span>Autobedrijf</span>")]<-1

Sales$zip<-str_extract(Sales$vendor_city, "^[[:digit:]]{4}")
Sales$zip<-as.factor(Sales$zip)
#
Sales1<-subset(Sales, is.na(rdeprrate)==FALSE)

# TABLE G.1 - Effect of recalls on depreciation rate
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + date1 + age_d | id_ver  + zip | 0 | id_type_date, data = Sales1))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d + date1| id_ver_rec  + zip | 0 | id_type_date, data = Sales1))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_time  + zip | 0 | id_type_date, data = Sales1))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_rec +id_ver_time  + zip | 0 | id_type_date, data = Sales1))

#Table G.2: Depreciation factor results by real vehicle listed price, separate regressions
Salesdplow<-subset(Sales1, rcatalogusprijs<30000)
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + date1 + age_d | id_ver  + zip | 0 | id_type_date, data = Salesdplow))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d + date1| id_ver_rec  + zip | 0 | id_type_date, data = Salesdplow))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_time  + zip | 0 | id_type_date, data = Salesdplow))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_rec +id_ver_time  + zip | 0 | id_type_date, data = Salesdplow))


Salesdphigh<-subset(Sales1, rcatalogusprijs>=30000)
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + date1 + age_d | id_ver  + zip | 0 | id_type_date, data = Salesdphigh))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d + date1| id_ver_rec  + zip | 0 | id_type_date, data = Salesdphigh))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_time  + zip | 0 | id_type_date, data = Salesdphigh))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_rec +id_ver_time  + zip | 0 | id_type_date, data = Salesdphigh))

#Table G.3: Depreciation factor results by inspection defects, separate regressions
load("APKtot_recent.RData")

APKtot_recent<-subset(APKtot_recent, kenteken %in% Sales$kenteken)

#Create a date with the day=1 for merge
Sales1$date1<-as.Date(paste(1, month(Sales1$date), year(Sales1$date), sep="/"), format("%d/%m/%Y") )

Sales1<-merge(Sales1, APKtot_recent, by.x=c("kenteken", "date1"), by.y=c("kenteken", "date"), all.x=TRUE, all.y=FALSE)
rm(APKtot_recent)
gc()

Sales1$APKtot[is.na(Sales1$APKtot)==TRUE]<-0
Sales2<-subset(Sales1, is.na(APKtot)==FALSE & APKtot==0 & age>=4)
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + date1 + age_d | id_ver  + zip | 0 | id_type_date, data = Sales2))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d + date1| id_ver_rec  + zip | 0 | id_type_date, data = Sales2))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_time  + zip | 0 | id_type_date, data = Sales2))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_rec +id_ver_time  + zip | 0 | id_type_date, data = Sales2))

Sales2<-subset(Sales1, is.na(APKtot)==FALSE & APKtot>=1 & age>=4)
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + date1 + age_d | id_ver  + zip | 0 | id_type_date, data = Sales2))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d + date1| id_ver_rec  + zip | 0 | id_type_date, data = Sales2))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_time  + zip | 0 | id_type_date, data = Sales2))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_rec +id_ver_time  + zip | 0 | id_type_date, data = Sales2))

#Table G.4: Depreciation factor results by brand reputation, separate regressions

Sales3<-subset(Sales1, is.na(rating_merk)==FALSE & rating_merk<7)
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + date1 + age_d | id_ver  + zip | 0 | id_type_date, data = Sales3))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d + date1| id_ver_rec  + zip | 0 | id_type_date, data = Sales3))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_time  + zip | 0 | id_type_date, data = Sales3))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_rec +id_ver_time  + zip | 0 | id_type_date, data = Sales3))
Sales3<-subset(Sales1, is.na(rating_merk)==FALSE & rating_merk>=7)
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + date1 + age_d | id_ver  + zip | 0 | id_type_date, data = Sales3))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d + date1| id_ver_rec  + zip | 0 | id_type_date, data = Sales3))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_time  + zip | 0 | id_type_date, data = Sales3))
summary(felm(rdeprrate ~ recalledpost+ recalled + km + km2 + vend_dealer + age_d| id_ver_rec +id_ver_time  + zip | 0 | id_type_date, data = Sales3))


